diff --git a/.gitattributes b/.gitattributes index 55efb38eafb56f2dd0719200f81894ef8245422e..fd59719344a24a94d0ed5853e2b89641648669d7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -225,3 +225,27 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text 8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text 8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text 8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text +8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text diff --git a/8b7178b13b/3583606.err b/8b7178b13b/3583606.err new file mode 100644 index 0000000000000000000000000000000000000000..2c8cead7e211f5dd7e8b51c1ee7fc5735594cd61 --- /dev/null +++ b/8b7178b13b/3583606.err @@ -0,0 +1,3643 @@ +12: 2023-05-25 13:35:25.939725: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +12: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +12: 2023-05-25 13:35:25.939735: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +12: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +12: 2023-05-25 13:35:25.939752: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 7: 2023-05-25 13:35:25.940137: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 7: 2023-05-25 13:35:25.940167: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 9: 2023-05-25 13:35:25.940120: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 9: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 9: 2023-05-25 13:35:25.940138: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 9: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 9: 2023-05-25 13:35:25.940172: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +12: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +12: 2023-05-25 13:35:25.939790: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +12: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +27: 2023-05-25 13:35:25.939845: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +27: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +27: 2023-05-25 13:35:25.939960: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +27: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +27: 2023-05-25 13:35:25.939965: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +30: 2023-05-25 13:35:25.940056: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +30: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +30: 2023-05-25 13:35:25.940073: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +30: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +30: 2023-05-25 13:35:25.940082: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 7: 2023-05-25 13:35:25.940203: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 7: 2023-05-25 13:35:25.940229: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 7: 2023-05-25 13:35:25.940237: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +12: 2023-05-25 13:35:25.939860: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +12: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +12: 2023-05-25 13:35:25.939884: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +12: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +13: 2023-05-25 13:35:25.940177: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +13: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +13: 2023-05-25 13:35:25.940219: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +13: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +13: 2023-05-25 13:35:25.940233: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +24: 2023-05-25 13:35:25.940270: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +24: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +24: 2023-05-25 13:35:25.940295: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +24: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +24: 2023-05-25 13:35:25.940311: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +27: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +27: 2023-05-25 13:35:25.939984: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +27: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +27: 2023-05-25 13:35:25.940016: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +27: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 9: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 9: 2023-05-25 13:35:25.940138: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 9: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 9: 2023-05-25 13:35:25.940179: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 9: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +12: 2023-05-25 13:35:25.939887: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +12: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +14: 2023-05-25 13:35:25.940347: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +14: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +14: 2023-05-25 13:35:25.940376: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +14: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +14: 2023-05-25 13:35:25.940405: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +27: 2023-05-25 13:35:25.940025: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +27: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +27: 2023-05-25 13:35:25.940076: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +27: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 7: 2023-05-25 13:35:25.940242: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 7: 2023-05-25 13:35:25.940250: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 9: 2023-05-25 13:35:25.940188: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 9: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 9: 2023-05-25 13:35:25.940200: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 9: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +12: 2023-05-25 13:35:25.939969: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +12: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +13: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +13: 2023-05-25 13:35:25.940259: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +13: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +13: 2023-05-25 13:35:25.940239: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +13: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +18: 2023-05-25 13:35:25.940336: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +18: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +18: 2023-05-25 13:35:25.940358: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +18: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +18: 2023-05-25 13:35:25.940345: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +27: 2023-05-25 13:35:25.940087: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +27: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 7: 2023-05-25 13:35:25.940315: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 9: 2023-05-25 13:35:25.940216: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 9: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +13: 2023-05-25 13:35:25.940289: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +13: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +13: 2023-05-25 13:35:25.940262: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +13: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +13: 2023-05-25 13:35:25.940339: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +18: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +18: 2023-05-25 13:35:25.940391: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +18: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +18: 2023-05-25 13:35:25.940409: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +18: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +24: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +24: 2023-05-25 13:35:25.940335: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +24: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +24: 2023-05-25 13:35:25.940338: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +24: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +13: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +16: 2023-05-25 13:35:25.940602: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +16: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +16: 2023-05-25 13:35:25.940613: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +16: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +16: 2023-05-25 13:35:25.940627: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +18: 2023-05-25 13:35:25.940428: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +18: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +18: 2023-05-25 13:35:25.940435: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +18: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +18: 2023-05-25 13:35:25.940456: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +25: 2023-05-25 13:35:25.940459: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +25: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +25: 2023-05-25 13:35:25.940474: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +25: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +25: 2023-05-25 13:35:25.940484: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +24: 2023-05-25 13:35:25.940356: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +24: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +24: 2023-05-25 13:35:25.940365: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +24: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +30: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +30: 2023-05-25 13:35:25.940102: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +30: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +16: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +16: 2023-05-25 13:35:25.940633: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +16: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +16: 2023-05-25 13:35:25.940647: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +16: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +18: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +25: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +25: 2023-05-25 13:35:25.940487: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +25: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +25: 2023-05-25 13:35:25.940504: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +25: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +24: 2023-05-25 13:35:25.940414: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +24: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +30: 2023-05-25 13:35:25.940125: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +30: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +30: 2023-05-25 13:35:25.940142: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +30: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +30: 2023-05-25 13:35:25.940135: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 0: 2023-05-25 13:35:25.940800: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 0: 2023-05-25 13:35:25.940825: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 0: 2023-05-25 13:35:25.940826: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +16: 2023-05-25 13:35:25.940657: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +16: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +25: 2023-05-25 13:35:25.940507: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +25: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +30: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +30: 2023-05-25 13:35:25.940145: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +30: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +16: 2023-05-25 13:35:25.940711: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +16: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +25: 2023-05-25 13:35:25.940509: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +25: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +16: 2023-05-25 13:35:25.940725: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +16: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +25: 2023-05-25 13:35:25.940615: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +25: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 0: 2023-05-25 13:35:25.941001: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 0: 2023-05-25 13:35:25.941000: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 0: 2023-05-25 13:35:25.941066: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 1: 2023-05-25 13:35:25.940858: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 1: 2023-05-25 13:35:25.940897: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 1: 2023-05-25 13:35:25.940917: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +14: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +14: 2023-05-25 13:35:25.940414: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +14: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +14: 2023-05-25 13:35:25.940385: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +14: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +14: 2023-05-25 13:35:25.940447: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +14: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +14: 2023-05-25 13:35:25.940449: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +14: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +14: 2023-05-25 13:35:25.940471: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +14: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 8: 2023-05-25 13:35:25.941095: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 8: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 8: 2023-05-25 13:35:25.941111: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 8: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 8: 2023-05-25 13:35:25.941114: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 1: 2023-05-25 13:35:25.940918: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 4: 2023-05-25 13:35:25.941035: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 4: 2023-05-25 13:35:25.941057: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 4: 2023-05-25 13:35:25.941064: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 1: 2023-05-25 13:35:25.940945: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 1: 2023-05-25 13:35:25.940948: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 1: 2023-05-25 13:35:25.940952: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 6: 2023-05-25 13:35:25.941147: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 6: 2023-05-25 13:35:25.941163: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 6: 2023-05-25 13:35:25.941211: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 1: 2023-05-25 13:35:25.940973: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 0: 2023-05-25 13:35:25.941369: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 6: 2023-05-25 13:35:25.941224: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 6: 2023-05-25 13:35:25.941238: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 6: 2023-05-25 13:35:25.941274: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 8: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 8: 2023-05-25 13:35:25.941151: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 8: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 0: 2023-05-25 13:35:25.941422: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 6: 2023-05-25 13:35:25.941326: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 8: 2023-05-25 13:35:25.941176: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 8: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 8: 2023-05-25 13:35:25.941180: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 8: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 8: 2023-05-25 13:35:25.941199: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +20: 2023-05-25 13:35:25.941272: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +20: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +20: 2023-05-25 13:35:25.941274: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +20: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +20: 2023-05-25 13:35:25.941282: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 4: 2023-05-25 13:35:25.941087: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 4: 2023-05-25 13:35:25.941097: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 6: 2023-05-25 13:35:25.941333: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 8: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 4: 2023-05-25 13:35:25.941109: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 4: 2023-05-25 13:35:25.941081: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 4: 2023-05-25 13:35:25.941113: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 8: 2023-05-25 13:35:25.941212: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 8: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 3: 2023-05-25 13:35:25.941350: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 3: 2023-05-25 13:35:25.941360: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 3: 2023-05-25 13:35:25.941369: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +10: 2023-05-25 13:35:25.941551: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +10: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +10: 2023-05-25 13:35:25.941568: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +10: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +10: 2023-05-25 13:35:25.941585: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +20: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +20: 2023-05-25 13:35:25.941316: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +20: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +20: 2023-05-25 13:35:25.941334: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +20: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +20: 2023-05-25 13:35:25.941343: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +20: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +20: 2023-05-25 13:35:25.941314: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +20: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +20: 2023-05-25 13:35:25.941363: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +20: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 3: 2023-05-25 13:35:25.941395: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 3: 2023-05-25 13:35:25.941421: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +23: 2023-05-25 13:35:25.941646: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +23: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +23: 2023-05-25 13:35:25.941646: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +23: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +23: 2023-05-25 13:35:25.941694: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 3: 2023-05-25 13:35:25.941426: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 3: 2023-05-25 13:35:25.941432: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 3: 2023-05-25 13:35:25.941455: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +10: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +10: 2023-05-25 13:35:25.941591: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +10: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +10: 2023-05-25 13:35:25.941626: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +10: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +10: 2023-05-25 13:35:25.941644: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +10: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +23: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +23: 2023-05-25 13:35:25.941717: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +23: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +23: 2023-05-25 13:35:25.941727: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +23: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +10: 2023-05-25 13:35:25.941663: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +10: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +10: 2023-05-25 13:35:25.941650: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +10: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +23: 2023-05-25 13:35:25.941730: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +23: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +15: 2023-05-25 13:35:25.941888: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +15: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +15: 2023-05-25 13:35:25.941907: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +15: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +15: 2023-05-25 13:35:25.941913: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +23: 2023-05-25 13:35:25.941695: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +23: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +23: 2023-05-25 13:35:25.941815: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +23: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +28: 2023-05-25 13:35:25.941721: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +28: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +28: 2023-05-25 13:35:25.941753: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +28: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +28: 2023-05-25 13:35:25.941769: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 5: 2023-05-25 13:35:25.941948: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 5: 2023-05-25 13:35:25.941956: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 5: 2023-05-25 13:35:25.941986: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +28: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +28: 2023-05-25 13:35:25.941810: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +28: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +28: 2023-05-25 13:35:25.941821: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +28: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2: 2023-05-25 13:35:25.941889: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2: 2023-05-25 13:35:25.941897: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2: 2023-05-25 13:35:25.941919: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 5: 2023-05-25 13:35:25.941952: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 5: 2023-05-25 13:35:25.942012: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +17: 2023-05-25 13:35:25.941692: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +17: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +17: 2023-05-25 13:35:25.941733: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +17: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +17: 2023-05-25 13:35:25.941769: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +19: 2023-05-25 13:35:25.941941: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +19: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +19: 2023-05-25 13:35:25.941949: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +19: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +19: 2023-05-25 13:35:25.942007: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +28: 2023-05-25 13:35:25.941850: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +28: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +28: 2023-05-25 13:35:25.941895: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +28: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +28: 2023-05-25 13:35:25.941915: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 5: 2023-05-25 13:35:25.942029: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 5: 2023-05-25 13:35:25.942036: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 5: 2023-05-25 13:35:25.942023: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +15: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +15: 2023-05-25 13:35:25.941926: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +15: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +15: 2023-05-25 13:35:25.941942: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +15: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +17: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +17: 2023-05-25 13:35:25.941818: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +17: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +17: 2023-05-25 13:35:25.941814: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +17: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +19: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +19: 2023-05-25 13:35:25.942022: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +19: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +19: 2023-05-25 13:35:25.942038: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +19: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +28: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +15: 2023-05-25 13:35:25.941925: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +15: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +15: 2023-05-25 13:35:25.941954: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +15: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +17: 2023-05-25 13:35:25.941839: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +17: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +17: 2023-05-25 13:35:25.941846: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +17: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +17: 2023-05-25 13:35:25.941893: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +19: 2023-05-25 13:35:25.942042: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +19: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +19: 2023-05-25 13:35:25.942002: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +19: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +15: 2023-05-25 13:35:25.942007: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +15: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +17: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +19: 2023-05-25 13:35:25.942117: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +19: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +29: 2023-05-25 13:35:25.942177: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +29: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +29: 2023-05-25 13:35:25.942200: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +29: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +29: 2023-05-25 13:35:25.942174: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2: 2023-05-25 13:35:25.941942: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2: 2023-05-25 13:35:25.941951: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2: 2023-05-25 13:35:25.941987: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2: 2023-05-25 13:35:25.941982: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2: 2023-05-25 13:35:25.942002: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +11: 2023-05-25 13:35:25.942466: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +11: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +11: 2023-05-25 13:35:25.942475: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +11: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +11: 2023-05-25 13:35:25.942488: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +29: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +29: 2023-05-25 13:35:25.942236: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +29: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +29: 2023-05-25 13:35:25.942245: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +29: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +29: 2023-05-25 13:35:25.942264: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +29: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +29: 2023-05-25 13:35:25.942300: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +29: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +29: 2023-05-25 13:35:25.942307: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +29: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +11: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +11: 2023-05-25 13:35:25.942511: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +11: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +11: 2023-05-25 13:35:25.942516: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +11: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +11: 2023-05-25 13:35:25.942540: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +11: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +11: 2023-05-25 13:35:25.942544: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +11: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +11: 2023-05-25 13:35:25.942554: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +11: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +26: 2023-05-25 13:35:25.943091: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +26: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +26: 2023-05-25 13:35:25.943115: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +26: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +26: 2023-05-25 13:35:25.943086: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +26: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +26: 2023-05-25 13:35:25.943088: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +26: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +26: 2023-05-25 13:35:25.943152: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +26: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +26: 2023-05-25 13:35:25.943160: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +26: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +26: 2023-05-25 13:35:25.943185: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +26: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +26: 2023-05-25 13:35:25.943198: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +26: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +21: 2023-05-25 13:35:25.943892: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +21: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +21: 2023-05-25 13:35:25.943914: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +21: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +21: 2023-05-25 13:35:25.943898: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +21: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +21: 2023-05-25 13:35:25.943934: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +21: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +21: 2023-05-25 13:35:25.943959: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +21: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +21: 2023-05-25 13:35:25.943961: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +21: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +21: 2023-05-25 13:35:25.943971: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +21: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +21: 2023-05-25 13:35:25.943983: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +21: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +22: 2023-05-25 13:35:25.944179: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +22: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +22: 2023-05-25 13:35:25.944178: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +22: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +22: 2023-05-25 13:35:25.944188: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +22: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +22: 2023-05-25 13:35:25.944209: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +22: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +22: 2023-05-25 13:35:25.944207: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +22: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +31: 2023-05-25 13:35:25.944218: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +31: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +31: 2023-05-25 13:35:25.944226: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +31: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +31: 2023-05-25 13:35:25.944217: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +22: 2023-05-25 13:35:25.944225: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +22: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +22: 2023-05-25 13:35:25.944212: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +22: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +22: 2023-05-25 13:35:25.944238: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +22: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +31: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +31: 2023-05-25 13:35:25.944225: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +31: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +31: 2023-05-25 13:35:25.944249: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +31: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +31: 2023-05-25 13:35:25.944250: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +31: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +31: 2023-05-25 13:35:25.944251: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +31: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +31: 2023-05-25 13:35:25.944303: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +31: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 3: 2023-05-25 13:35:38.218219: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.218040: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:35:38.218262: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.218078: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:35:38.218286: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:35:38.218328: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:35:38.218301: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.218144: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:35:38.218278: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.218178: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:35:38.218301: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:35:38.218327: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.218128: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:35:38.218320: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:35:38.218358: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:35:38.218349: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.218156: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:35:38.218335: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:35:38.218376: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:35:38.218357: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.218171: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:35:38.218349: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:35:38.218397: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:35:38.218384: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.218175: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:35:38.218818: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.218421: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:35:38.218377: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.218788: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +19: 2023-05-25 13:35:38.218825: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 3: 2023-05-25 13:35:38.218838: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 3: 2023-05-25 13:35:38.218862: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 3: 2023-05-25 13:35:38.218871: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.218416: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:35:38.218297: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:35:38.218371: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.218881: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 3: 2023-05-25 13:35:38.218881: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 3: 2023-05-25 13:35:38.218885: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 3: 2023-05-25 13:35:38.218890: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 3: 2023-05-25 13:35:38.218892: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.218410: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:35:38.218389: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.218879: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +19: 2023-05-25 13:35:38.218895: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +19: 2023-05-25 13:35:38.218900: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +19: 2023-05-25 13:35:38.218917: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.218417: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.218942: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.219041: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.218326: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:35:38.218339: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:35:38.219009: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +17: 2023-05-25 13:35:38.219026: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.218357: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:35:38.218370: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:35:38.218380: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:35:38.219038: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.219061: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.219070: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.219082: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.219086: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.219100: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.218391: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:35:38.219052: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +17: 2023-05-25 13:35:38.219057: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +17: 2023-05-25 13:35:38.219060: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +17: 2023-05-25 13:35:38.219065: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +17: 2023-05-25 13:35:38.219066: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.218395: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:35:38.219103: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.219107: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.219258: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.219279: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.219285: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.219303: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.219307: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.219309: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.219316: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.219329: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +31: 2023-05-25 13:35:38.219800: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:35:38.219817: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:35:38.219834: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:35:38.219846: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:35:38.219846: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:35:38.219849: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:35:38.219862: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:35:38.219864: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:35:38.220353: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +31: 2023-05-25 13:35:38.220371: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +31: 2023-05-25 13:35:38.220383: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +31: 2023-05-25 13:35:38.220395: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +31: 2023-05-25 13:35:38.220408: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +31: 2023-05-25 13:35:38.220410: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +31: 2023-05-25 13:35:38.220420: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +31: 2023-05-25 13:35:38.220427: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 7: 2023-05-25 13:35:38.220743: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:35:38.220762: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:35:38.220785: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:35:38.220791: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:35:38.220804: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:35:38.220805: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:35:38.220807: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:35:38.220817: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:35:38.221283: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 7: 2023-05-25 13:35:38.221305: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 7: 2023-05-25 13:35:38.221312: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 7: 2023-05-25 13:35:38.221326: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 7: 2023-05-25 13:35:38.221329: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 7: 2023-05-25 13:35:38.221336: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 7: 2023-05-25 13:35:38.221335: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 7: 2023-05-25 13:35:38.221348: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +11: 2023-05-25 13:35:38.221934: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:35:38.221981: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:35:38.221952: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:35:38.222000: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:35:38.221964: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:35:38.222037: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:35:38.221984: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:35:38.222035: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:35:38.221990: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:35:38.222008: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:35:38.221984: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:35:38.222034: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:35:38.221994: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:35:38.222048: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:35:38.222005: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:35:38.222052: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:35:38.222519: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +11: 2023-05-25 13:35:38.222536: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +11: 2023-05-25 13:35:38.222550: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +11: 2023-05-25 13:35:38.222558: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 5: 2023-05-25 13:35:38.222554: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +11: 2023-05-25 13:35:38.222567: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +11: 2023-05-25 13:35:38.222571: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.222144: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:35:38.222563: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 5: 2023-05-25 13:35:38.222573: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 5: 2023-05-25 13:35:38.222580: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 5: 2023-05-25 13:35:38.222577: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +11: 2023-05-25 13:35:38.222580: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +11: 2023-05-25 13:35:38.222584: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.222159: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:35:38.222174: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:35:38.222184: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:35:38.222189: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:35:38.222589: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 5: 2023-05-25 13:35:38.222590: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 5: 2023-05-25 13:35:38.222598: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.222204: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:35:38.222195: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:35:38.222207: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:35:38.222688: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.222704: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.222716: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.222736: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.222740: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.222745: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.222753: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.222755: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +21: 2023-05-25 13:35:38.224052: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.224073: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.224110: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.224145: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.224123: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.224128: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.224110: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.224131: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.224574: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +21: 2023-05-25 13:35:38.224594: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +21: 2023-05-25 13:35:38.224617: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +21: 2023-05-25 13:35:38.224629: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +21: 2023-05-25 13:35:38.224651: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +21: 2023-05-25 13:35:38.224656: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +21: 2023-05-25 13:35:38.224659: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +21: 2023-05-25 13:35:38.224668: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +20: 2023-05-25 13:35:38.225347: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:35:38.225366: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:35:38.225378: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:35:38.225418: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:35:38.225424: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:35:38.225413: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:35:38.225429: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:35:38.225397: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:35:38.225830: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +20: 2023-05-25 13:35:38.225851: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +20: 2023-05-25 13:35:38.225864: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +20: 2023-05-25 13:35:38.225874: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +20: 2023-05-25 13:35:38.225872: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +20: 2023-05-25 13:35:38.225887: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +20: 2023-05-25 13:35:38.225893: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +20: 2023-05-25 13:35:38.225895: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +12: 2023-05-25 13:35:38.226465: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.226479: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.226492: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.226504: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.226509: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.226515: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.226497: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.226518: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.227016: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +10: 2023-05-25 13:35:38.226779: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.227035: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +12: 2023-05-25 13:35:38.227047: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +10: 2023-05-25 13:35:38.226795: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.227052: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +10: 2023-05-25 13:35:38.226806: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.227061: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +10: 2023-05-25 13:35:38.226819: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:35:38.226831: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:35:38.226823: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:35:38.226829: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:35:38.226838: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.227073: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +10: 2023-05-25 13:35:38.227293: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +12: 2023-05-25 13:35:38.227076: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +12: 2023-05-25 13:35:38.227078: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +10: 2023-05-25 13:35:38.227311: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +10: 2023-05-25 13:35:38.227327: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +10: 2023-05-25 13:35:38.227330: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +10: 2023-05-25 13:35:38.227339: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +28: 2023-05-25 13:35:38.227033: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:35:38.227346: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +10: 2023-05-25 13:35:38.227349: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +10: 2023-05-25 13:35:38.227351: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +28: 2023-05-25 13:35:38.227053: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:35:38.227083: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:35:38.227101: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:35:38.227123: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:35:38.227131: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:35:38.227111: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:35:38.227072: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:35:38.227503: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +28: 2023-05-25 13:35:38.227517: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +28: 2023-05-25 13:35:38.227520: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +28: 2023-05-25 13:35:38.227526: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +28: 2023-05-25 13:35:38.227536: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +28: 2023-05-25 13:35:38.227551: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +28: 2023-05-25 13:35:38.227561: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +28: 2023-05-25 13:35:38.227563: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +30: 2023-05-25 13:35:38.227384: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:35:38.227407: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:35:38.227426: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:35:38.227458: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:35:38.227517: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:35:38.227449: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:35:38.227463: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:35:38.227444: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:35:38.227538: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:35:38.227457: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:35:38.227562: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:35:38.227878: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 6: 2023-05-25 13:35:38.227585: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:35:38.227594: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:35:38.227592: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:35:38.227578: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:35:38.227905: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +30: 2023-05-25 13:35:38.227910: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +30: 2023-05-25 13:35:38.227924: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +30: 2023-05-25 13:35:38.227925: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +30: 2023-05-25 13:35:38.227936: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 6: 2023-05-25 13:35:38.227545: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:35:38.228009: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +30: 2023-05-25 13:35:38.227939: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +30: 2023-05-25 13:35:38.227942: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 6: 2023-05-25 13:35:38.228032: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 6: 2023-05-25 13:35:38.228035: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 6: 2023-05-25 13:35:38.228040: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 6: 2023-05-25 13:35:38.228062: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 6: 2023-05-25 13:35:38.228065: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 6: 2023-05-25 13:35:38.228068: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 6: 2023-05-25 13:35:38.228071: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +13: 2023-05-25 13:35:38.234531: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:35:38.234571: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:35:38.234591: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:35:38.234621: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:35:38.234610: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:35:38.234631: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:35:38.234641: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:35:38.234646: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:35:38.235162: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +13: 2023-05-25 13:35:38.235174: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +13: 2023-05-25 13:35:38.235215: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +13: 2023-05-25 13:35:38.235224: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +13: 2023-05-25 13:35:38.235234: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +13: 2023-05-25 13:35:38.235253: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +13: 2023-05-25 13:35:38.235259: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +13: 2023-05-25 13:35:38.235267: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.235929: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:35:38.235951: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:35:38.235961: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:35:38.235975: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:35:38.235984: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:35:38.235970: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:35:38.235992: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:35:38.235997: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:35:38.236460: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.236479: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.236492: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.236498: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.236503: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.236507: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.236509: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.236525: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 1: 2023-05-25 13:35:38.240068: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:35:38.240061: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:35:38.240123: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:35:38.240144: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:35:38.240157: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:35:38.240115: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:35:38.240170: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:35:38.240164: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:35:38.240199: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:35:38.240185: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:35:38.240218: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:35:38.240620: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.240242: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:35:38.240643: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 1: 2023-05-25 13:35:38.240652: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.240254: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:35:38.240671: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 1: 2023-05-25 13:35:38.240682: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.240235: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:35:38.240688: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.240280: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:35:38.240259: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:35:38.240698: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.240764: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 1: 2023-05-25 13:35:38.240718: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.240781: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.240796: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.240802: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.240810: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.240823: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.240838: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.240840: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +25: 2023-05-25 13:35:38.241882: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:35:38.241911: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:35:38.241934: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:35:38.241960: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:35:38.241947: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:35:38.241967: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:35:38.241977: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:35:38.241979: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:35:38.242483: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +25: 2023-05-25 13:35:38.242503: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +25: 2023-05-25 13:35:38.242512: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +25: 2023-05-25 13:35:38.242525: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +25: 2023-05-25 13:35:38.242532: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +25: 2023-05-25 13:35:38.242533: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +25: 2023-05-25 13:35:38.242538: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +25: 2023-05-25 13:35:38.242551: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +29: 2023-05-25 13:35:38.256402: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:35:38.256430: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:35:38.256452: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:35:38.256461: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:35:38.256462: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:35:38.256437: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:35:38.256473: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:35:38.256484: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:35:38.256984: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +29: 2023-05-25 13:35:38.257004: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +29: 2023-05-25 13:35:38.257028: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +29: 2023-05-25 13:35:38.257040: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +29: 2023-05-25 13:35:38.257064: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +29: 2023-05-25 13:35:38.257067: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +29: 2023-05-25 13:35:38.257071: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +29: 2023-05-25 13:35:38.257083: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +23: 2023-05-25 13:35:38.256911: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:35:38.256930: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:35:38.256970: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:35:38.256975: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:35:38.256988: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:35:38.256964: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:35:38.256997: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:35:38.256975: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:35:38.257429: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +23: 2023-05-25 13:35:38.257448: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +23: 2023-05-25 13:35:38.257459: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +23: 2023-05-25 13:35:38.257472: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +23: 2023-05-25 13:35:38.257482: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +23: 2023-05-25 13:35:38.257484: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +23: 2023-05-25 13:35:38.257487: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +23: 2023-05-25 13:35:38.257499: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +18: 2023-05-25 13:35:38.258720: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:35:38.258737: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:35:38.258758: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:35:38.258789: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:35:38.258800: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:35:38.258804: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:35:38.258807: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:35:38.258800: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:35:38.259195: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +18: 2023-05-25 13:35:38.259216: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +18: 2023-05-25 13:35:38.259223: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +18: 2023-05-25 13:35:38.259228: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +18: 2023-05-25 13:35:38.259252: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.259029: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:35:38.259258: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +18: 2023-05-25 13:35:38.259262: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +18: 2023-05-25 13:35:38.259266: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.259056: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:35:38.259061: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:35:38.259085: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:35:38.259079: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:35:38.259089: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:35:38.259059: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:35:38.259105: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:35:38.259103: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:35:38.259083: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:35:38.259093: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:35:38.259106: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:35:38.259112: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:35:38.259116: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:35:38.259123: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:35:38.259128: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:35:38.259552: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.259559: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.259586: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.259592: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.259599: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.259615: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +26: 2023-05-25 13:35:38.259637: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.259619: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.259626: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +26: 2023-05-25 13:35:38.259663: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +26: 2023-05-25 13:35:38.259670: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +26: 2023-05-25 13:35:38.259698: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +26: 2023-05-25 13:35:38.259698: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +26: 2023-05-25 13:35:38.259709: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +26: 2023-05-25 13:35:38.259715: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +26: 2023-05-25 13:35:38.259721: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.259507: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:35:38.259475: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:35:38.259524: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:35:38.259540: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:35:38.259547: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:35:38.259494: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:35:38.259559: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:35:38.259515: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:35:38.259558: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:35:38.259522: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:35:38.259534: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:35:38.259532: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:35:38.259539: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:35:38.259543: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:35:38.260001: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +15: 2023-05-25 13:35:38.260021: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +15: 2023-05-25 13:35:38.260029: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +15: 2023-05-25 13:35:38.260041: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +15: 2023-05-25 13:35:38.260046: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +15: 2023-05-25 13:35:38.260048: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +15: 2023-05-25 13:35:38.260054: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +15: 2023-05-25 13:35:38.260059: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.259677: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:35:38.259788: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.259710: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:35:38.259695: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:35:38.259700: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:35:38.259801: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:35:38.259713: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:35:38.259815: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.259729: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:35:38.259720: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:35:38.259823: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.259741: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:35:38.259721: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:35:38.259831: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.259755: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:35:38.259727: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:35:38.259842: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.259759: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:35:38.259733: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:35:38.259842: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.259764: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:35:38.259848: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.259770: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.259780: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:35:38.260546: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +27: 2023-05-25 13:35:38.260553: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +27: 2023-05-25 13:35:38.260574: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.260497: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.260517: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 4: 2023-05-25 13:35:38.260517: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +27: 2023-05-25 13:35:38.260575: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +27: 2023-05-25 13:35:38.260580: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.260524: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +27: 2023-05-25 13:35:38.260591: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +27: 2023-05-25 13:35:38.260601: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.260537: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.260542: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.260548: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.260550: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +27: 2023-05-25 13:35:38.260607: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 4: 2023-05-25 13:35:38.260544: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 4: 2023-05-25 13:35:38.260547: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.260555: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 4: 2023-05-25 13:35:38.260563: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 4: 2023-05-25 13:35:38.260564: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 4: 2023-05-25 13:35:38.260572: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 4: 2023-05-25 13:35:38.260579: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 4: 2023-05-25 13:35:38.260584: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.259572: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:35:38.259577: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:35:38.260032: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.260048: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.260063: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.260067: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.260085: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.260092: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.260097: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.260099: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +30: 2023-05-25 13:36:07.952217: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:07.952241: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:07.952247: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.952416: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:07.952259: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.952436: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:07.952263: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.952440: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:07.952260: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.952459: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:07.952273: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.952464: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:07.952285: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.952462: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.952468: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.952479: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.952702: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.952723: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.952740: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.952755: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:07.952818: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.952805: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.952762: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.952768: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:07.952842: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.952825: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.952770: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:07.952852: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.952849: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.952772: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:07.952868: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.952857: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:07.952875: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.952877: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.952935: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:07.952882: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.953030: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.952881: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:07.952887: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.953051: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.952891: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.952950: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:07.952886: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.953070: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.952894: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.952963: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.953091: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.952984: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.953100: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.952982: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.953104: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.952990: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.953107: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.952992: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.953110: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.953000: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.953255: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.953283: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.953295: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.953312: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.953316: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.953326: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.953331: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.953346: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:07.953498: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:07.953519: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.953499: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:07.953539: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:07.953560: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:07.953573: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.953509: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:07.953576: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.953528: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:07.953571: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.953527: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:07.953587: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.953534: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.953544: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.953569: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.953546: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.953553: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.953588: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.953600: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.953604: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.953610: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:07.953712: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.953625: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.953623: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.953627: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:07.953732: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:07.953744: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:07.953756: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:07.953769: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:07.953769: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:07.953777: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:07.953781: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.953904: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:07.953894: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.953924: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.953949: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.953956: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.953974: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.953977: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:07.953913: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.953978: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:07.953922: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.953988: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:07.953931: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:07.953939: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:07.953939: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:07.953951: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:07.953952: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:07.954344: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:07.954367: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:07.954372: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:07.954387: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:07.954390: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:07.954385: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:07.954406: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:07.954409: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.955367: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.955381: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.955391: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.955410: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.955418: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.955424: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.955536: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.955536: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:07.955686: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:07.955708: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.955696: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.955755: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:07.955715: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:07.955729: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.955797: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:07.955726: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:07.955733: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.955715: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.955801: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:07.955742: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.955723: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.955823: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:07.955746: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.955739: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.955826: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.955737: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.955832: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.955752: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.955827: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:07.955850: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.955754: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.955846: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.955755: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:07.955861: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:07.955900: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:07.955915: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:07.955944: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:07.955943: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:07.955949: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:07.955967: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.956292: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.956310: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.956326: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:07.956412: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.956336: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.956347: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:07.956428: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.956351: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:07.956450: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.956357: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:07.956457: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.956359: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:07.956462: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:07.956492: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:07.956636: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:07.956648: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:07.956634: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.956805: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.956802: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:07.956667: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.956840: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.956878: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:07.956689: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.956842: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.956872: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:07.956701: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.956876: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.956868: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:07.956718: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.956885: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.956888: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:07.956731: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.956891: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.956892: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:07.956741: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.956904: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.956899: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:07.956753: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.956917: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.956907: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:07.985817: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:07.985821: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:07.985826: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:07.985835: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +30: 2023-05-25 13:36:07.985830: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:07.985836: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +30: 2023-05-25 13:36:07.985832: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:07.985832: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:07.985834: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:07.985849: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +30: 2023-05-25 13:36:07.985860: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +30: 2023-05-25 13:36:07.985863: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +30: 2023-05-25 13:36:07.985864: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +30: 2023-05-25 13:36:07.985866: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +30: 2023-05-25 13:36:07.986133: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:07.986162: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 2: 2023-05-25 13:36:07.986524: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.986526: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:07.986480: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.986527: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:07.986482: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.986532: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.986543: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +12: 2023-05-25 13:36:07.986482: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.986544: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 2: 2023-05-25 13:36:07.986535: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.986714: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:07.986482: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.986725: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986662: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986680: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 2: 2023-05-25 13:36:07.986537: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.986546: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +12: 2023-05-25 13:36:07.986484: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.986603: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.986729: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986678: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986701: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 2: 2023-05-25 13:36:07.986534: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.986718: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:07.986484: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.986733: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986684: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.986540: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.986557: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 7: 2023-05-25 13:36:07.986690: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.986725: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:07.986488: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.986605: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.986731: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:07.986694: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986684: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.986564: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 2: 2023-05-25 13:36:07.986566: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 2: 2023-05-25 13:36:07.986568: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 9: 2023-05-25 13:36:07.986729: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:07.986488: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:07.986517: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +20: 2023-05-25 13:36:07.986606: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.986796: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.986737: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.986750: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +24: 2023-05-25 13:36:07.986699: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986692: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.986569: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 9: 2023-05-25 13:36:07.986731: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:07.986517: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +12: 2023-05-25 13:36:07.986521: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +12: 2023-05-25 13:36:07.986522: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +20: 2023-05-25 13:36:07.986610: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.986796: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.986739: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:07.986705: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986692: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.986735: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.986745: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +12: 2023-05-25 13:36:07.986524: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +12: 2023-05-25 13:36:07.986527: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +12: 2023-05-25 13:36:07.986527: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +20: 2023-05-25 13:36:07.986610: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.986802: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.986754: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +29: 2023-05-25 13:36:07.986741: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:07.986705: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986696: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.986745: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 9: 2023-05-25 13:36:07.986756: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 9: 2023-05-25 13:36:07.986758: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +12: 2023-05-25 13:36:07.986525: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +20: 2023-05-25 13:36:07.986616: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.986800: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.986757: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +29: 2023-05-25 13:36:07.986744: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:07.986703: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986712: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986725: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 9: 2023-05-25 13:36:07.986760: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 9: 2023-05-25 13:36:07.986760: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +20: 2023-05-25 13:36:07.986610: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.986806: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.986766: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +29: 2023-05-25 13:36:07.986770: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +29: 2023-05-25 13:36:07.986771: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +24: 2023-05-25 13:36:07.986710: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986724: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 1: 2023-05-25 13:36:07.986727: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 1: 2023-05-25 13:36:07.986731: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 9: 2023-05-25 13:36:07.986818: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.986622: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.986640: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +19: 2023-05-25 13:36:07.986817: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.986774: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +29: 2023-05-25 13:36:07.986776: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +24: 2023-05-25 13:36:07.986713: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986732: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 1: 2023-05-25 13:36:07.986740: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 9: 2023-05-25 13:36:07.986817: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.986834: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +20: 2023-05-25 13:36:07.986640: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +20: 2023-05-25 13:36:07.986641: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +20: 2023-05-25 13:36:07.986654: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +19: 2023-05-25 13:36:07.986818: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.986829: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +24: 2023-05-25 13:36:07.986716: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:07.986735: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 9: 2023-05-25 13:36:07.986836: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +20: 2023-05-25 13:36:07.986654: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +20: 2023-05-25 13:36:07.986657: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +20: 2023-05-25 13:36:07.986656: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +19: 2023-05-25 13:36:07.986829: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +19: 2023-05-25 13:36:07.986829: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +19: 2023-05-25 13:36:07.986835: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +24: 2023-05-25 13:36:07.986733: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +24: 2023-05-25 13:36:07.986733: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +24: 2023-05-25 13:36:07.986744: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +20: 2023-05-25 13:36:07.986653: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +19: 2023-05-25 13:36:07.986833: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +19: 2023-05-25 13:36:07.986838: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +19: 2023-05-25 13:36:07.986838: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +24: 2023-05-25 13:36:07.986748: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +24: 2023-05-25 13:36:07.986746: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +24: 2023-05-25 13:36:07.986745: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +19: 2023-05-25 13:36:07.986889: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.986903: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +24: 2023-05-25 13:36:07.986749: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +28: 2023-05-25 13:36:07.987987: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:07.988016: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +28: 2023-05-25 13:36:07.988017: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:07.988018: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:07.988024: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:07.988020: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:07.988028: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:07.988032: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:07.988037: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:07.988072: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +28: 2023-05-25 13:36:07.988075: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +28: 2023-05-25 13:36:07.988078: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +28: 2023-05-25 13:36:07.988079: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +28: 2023-05-25 13:36:07.988081: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +28: 2023-05-25 13:36:07.988083: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +28: 2023-05-25 13:36:07.988083: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 0: 2023-05-25 13:36:07.988652: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.988657: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.988667: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 0: 2023-05-25 13:36:07.988672: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 0: 2023-05-25 13:36:07.988661: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.988668: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.988671: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.988672: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.988679: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.988692: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 0: 2023-05-25 13:36:07.988693: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 0: 2023-05-25 13:36:07.988695: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 0: 2023-05-25 13:36:07.988696: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 0: 2023-05-25 13:36:07.988697: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 4: 2023-05-25 13:36:07.989851: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.989855: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.989861: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.989859: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.989872: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.989877: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.989877: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.989890: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 4: 2023-05-25 13:36:07.989882: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.989889: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 4: 2023-05-25 13:36:07.989891: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 4: 2023-05-25 13:36:07.989898: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 4: 2023-05-25 13:36:07.989906: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 4: 2023-05-25 13:36:07.989909: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 4: 2023-05-25 13:36:07.989911: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 4: 2023-05-25 13:36:07.989915: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +31: 2023-05-25 13:36:07.990973: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.990982: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.991003: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +31: 2023-05-25 13:36:07.990988: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.991010: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +31: 2023-05-25 13:36:07.990994: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.990994: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.990999: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.991004: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.991004: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.991035: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +31: 2023-05-25 13:36:07.991044: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +31: 2023-05-25 13:36:07.991044: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +31: 2023-05-25 13:36:07.991046: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +31: 2023-05-25 13:36:07.991050: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +31: 2023-05-25 13:36:07.991051: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 3: 2023-05-25 13:36:07.991497: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:07.991514: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 3: 2023-05-25 13:36:07.991516: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:07.991539: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 3: 2023-05-25 13:36:07.991527: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:07.991529: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:07.991533: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:07.991531: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:07.991539: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:07.991565: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 3: 2023-05-25 13:36:07.991566: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 3: 2023-05-25 13:36:07.991568: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 3: 2023-05-25 13:36:07.991569: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 3: 2023-05-25 13:36:07.991570: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 3: 2023-05-25 13:36:07.991583: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:07.991605: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +23: 2023-05-25 13:36:07.992248: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.992252: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.992259: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.992261: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.992276: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +23: 2023-05-25 13:36:07.992275: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +23: 2023-05-25 13:36:07.992262: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.992264: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.992261: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.992267: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.992283: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +23: 2023-05-25 13:36:07.992294: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +23: 2023-05-25 13:36:07.992297: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +23: 2023-05-25 13:36:07.992300: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +23: 2023-05-25 13:36:07.992301: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +23: 2023-05-25 13:36:07.992302: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +27: 2023-05-25 13:36:07.992587: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.992593: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.992599: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.992599: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.992607: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +27: 2023-05-25 13:36:07.992608: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +27: 2023-05-25 13:36:07.992604: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.992606: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.992606: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.992609: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.992630: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +27: 2023-05-25 13:36:07.992628: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +27: 2023-05-25 13:36:07.992636: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +27: 2023-05-25 13:36:07.992638: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +27: 2023-05-25 13:36:07.992640: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +27: 2023-05-25 13:36:07.992641: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +13: 2023-05-25 13:36:07.993067: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.993081: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.993088: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.993089: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.993102: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +13: 2023-05-25 13:36:07.993108: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +13: 2023-05-25 13:36:07.993093: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.993093: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.993102: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.993102: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.993130: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +13: 2023-05-25 13:36:07.993132: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +13: 2023-05-25 13:36:07.993137: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +13: 2023-05-25 13:36:07.993140: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +13: 2023-05-25 13:36:07.993142: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +13: 2023-05-25 13:36:07.993143: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +26: 2023-05-25 13:36:07.993531: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.993535: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.993538: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.993540: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.993544: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.993554: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +26: 2023-05-25 13:36:07.993555: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +26: 2023-05-25 13:36:07.993545: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.993545: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.993550: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.993564: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +26: 2023-05-25 13:36:07.993565: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +26: 2023-05-25 13:36:07.993569: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +26: 2023-05-25 13:36:07.993575: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +15: 2023-05-25 13:36:07.993717: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.993575: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +26: 2023-05-25 13:36:07.993577: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +15: 2023-05-25 13:36:07.993722: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.993721: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.993723: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.993724: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:07.993861: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.993734: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.993725: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.993732: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.993750: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +11: 2023-05-25 13:36:07.993865: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.993750: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +15: 2023-05-25 13:36:07.993753: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +15: 2023-05-25 13:36:07.993751: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +11: 2023-05-25 13:36:07.993870: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.993751: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +15: 2023-05-25 13:36:07.993753: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +15: 2023-05-25 13:36:07.993758: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +17: 2023-05-25 13:36:07.993849: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:07.993868: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.993761: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +16: 2023-05-25 13:36:07.993957: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:07.993871: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:07.993870: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:07.993850: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:07.993877: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:07.993888: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +16: 2023-05-25 13:36:07.993960: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:07.993855: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:07.993880: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:07.993889: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +16: 2023-05-25 13:36:07.993959: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:07.993856: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:07.993892: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +11: 2023-05-25 13:36:07.993903: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +11: 2023-05-25 13:36:07.993904: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +16: 2023-05-25 13:36:07.993964: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:07.993853: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:07.993902: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +11: 2023-05-25 13:36:07.993908: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +11: 2023-05-25 13:36:07.993911: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +16: 2023-05-25 13:36:07.993962: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:07.993854: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.993952: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.993983: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +16: 2023-05-25 13:36:07.993966: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:07.993982: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +17: 2023-05-25 13:36:07.993858: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:07.993979: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +16: 2023-05-25 13:36:07.993979: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +16: 2023-05-25 13:36:07.993984: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +17: 2023-05-25 13:36:07.993858: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:07.993887: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +10: 2023-05-25 13:36:07.993957: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:07.993973: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:07.993981: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +17: 2023-05-25 13:36:07.993888: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +17: 2023-05-25 13:36:07.993891: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +17: 2023-05-25 13:36:07.993894: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +16: 2023-05-25 13:36:07.993994: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +16: 2023-05-25 13:36:07.994001: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +17: 2023-05-25 13:36:07.993890: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +17: 2023-05-25 13:36:07.993894: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +17: 2023-05-25 13:36:07.993891: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +21: 2023-05-25 13:36:07.993971: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:07.994112: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:07.993887: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +21: 2023-05-25 13:36:07.993973: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:07.993960: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:07.994128: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +21: 2023-05-25 13:36:07.993976: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:07.993964: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.993982: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:07.993960: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.993987: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:07.993970: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.993986: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:07.993962: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.993993: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.994025: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +10: 2023-05-25 13:36:07.993974: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.994027: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +21: 2023-05-25 13:36:07.994032: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +21: 2023-05-25 13:36:07.994032: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +10: 2023-05-25 13:36:07.993977: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:07.993994: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +21: 2023-05-25 13:36:07.994036: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +21: 2023-05-25 13:36:07.994038: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +21: 2023-05-25 13:36:07.994039: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +10: 2023-05-25 13:36:07.993996: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +10: 2023-05-25 13:36:07.993998: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +10: 2023-05-25 13:36:07.993999: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +10: 2023-05-25 13:36:07.994001: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +10: 2023-05-25 13:36:07.994001: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +10: 2023-05-25 13:36:07.994006: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +10: 2023-05-25 13:36:07.994004: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +22: 2023-05-25 13:36:07.997445: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.997470: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.997480: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.997509: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.997520: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.997519: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.997533: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.997535: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.998803: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.998808: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.998808: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.998808: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.998807: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.998815: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.998812: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.998833: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +22: 2023-05-25 13:36:07.998831: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +22: 2023-05-25 13:36:07.998821: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.998835: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +22: 2023-05-25 13:36:07.998845: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +22: 2023-05-25 13:36:07.998846: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +22: 2023-05-25 13:36:07.998847: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +22: 2023-05-25 13:36:07.998844: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +22: 2023-05-25 13:36:07.998851: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 5: 2023-05-25 13:36:08.000503: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.000535: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.000568: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.000602: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.000615: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.000623: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.000625: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.000472: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.000830: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.000497: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.000514: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.000547: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.000549: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.000557: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.000562: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.000738: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.001702: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.001703: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.001706: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.001708: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.001711: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.001713: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.001715: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.001727: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 8: 2023-05-25 13:36:08.001727: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 8: 2023-05-25 13:36:08.001717: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.001727: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 8: 2023-05-25 13:36:08.001728: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 8: 2023-05-25 13:36:08.001727: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 8: 2023-05-25 13:36:08.001738: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 8: 2023-05-25 13:36:08.001740: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 8: 2023-05-25 13:36:08.001744: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 5: 2023-05-25 13:36:08.001907: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.001916: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.001922: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 5: 2023-05-25 13:36:08.001916: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.001920: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.001920: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.001931: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 5: 2023-05-25 13:36:08.001925: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.001924: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.001928: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.001939: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 5: 2023-05-25 13:36:08.001944: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 5: 2023-05-25 13:36:08.001945: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 5: 2023-05-25 13:36:08.001946: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 5: 2023-05-25 13:36:08.001948: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 5: 2023-05-25 13:36:08.001948: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +18: 2023-05-25 13:36:08.002562: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.002599: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.002617: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.002635: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.002640: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.002646: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.002653: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.002799: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:08.003066: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:08.003083: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:08.003089: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:08.003101: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:08.003112: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:08.003108: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:08.003117: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:08.003230: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.003600: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.003616: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +18: 2023-05-25 13:36:08.003617: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.003618: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.003617: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.003622: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.003622: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.003626: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.003622: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.003642: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +18: 2023-05-25 13:36:08.003642: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +18: 2023-05-25 13:36:08.003643: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +18: 2023-05-25 13:36:08.003645: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 6: 2023-05-25 13:36:08.003952: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.003973: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.003986: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004005: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004010: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004017: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004022: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004030: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:08.004180: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:08.004183: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:08.004186: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:08.004187: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:08.004186: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:08.004191: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:08.004191: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:08.004200: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +25: 2023-05-25 13:36:08.004200: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +25: 2023-05-25 13:36:08.004204: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +25: 2023-05-25 13:36:08.004204: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +25: 2023-05-25 13:36:08.004206: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +25: 2023-05-25 13:36:08.004198: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:08.004208: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +25: 2023-05-25 13:36:08.004210: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +25: 2023-05-25 13:36:08.004222: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 6: 2023-05-25 13:36:08.004979: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004980: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004980: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004982: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004984: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004981: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004981: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004988: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004995: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 6: 2023-05-25 13:36:08.004996: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 6: 2023-05-25 13:36:08.005004: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 6: 2023-05-25 13:36:08.005003: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 6: 2023-05-25 13:36:08.005005: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 6: 2023-05-25 13:36:08.005005: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 6: 2023-05-25 13:36:08.005007: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 6: 2023-05-25 13:36:08.005007: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +14: 2023-05-25 13:36:08.006968: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:08.006984: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:08.006995: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:08.007000: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:08.007012: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:08.007017: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:08.007024: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:08.007027: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:08.007868: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:08.007883: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +14: 2023-05-25 13:36:08.007875: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:08.007875: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:08.007882: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:08.007885: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:08.007885: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:08.007887: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:08.007891: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:08.007900: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +14: 2023-05-25 13:36:08.007901: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +14: 2023-05-25 13:36:08.007905: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +14: 2023-05-25 13:36:08.007907: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +14: 2023-05-25 13:36:08.007907: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +14: 2023-05-25 13:36:08.007909: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +14: 2023-05-25 13:36:08.007911: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 0: 2023-05-25 13:36:07.988919: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.988933: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 7: 2023-05-25 13:36:07.986687: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:07.986692: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:07.986697: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:07.986702: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:07.986705: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:07.986720: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 7: 2023-05-25 13:36:07.986723: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 7: 2023-05-25 13:36:07.986723: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 7: 2023-05-25 13:36:07.986726: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 7: 2023-05-25 13:36:07.986730: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 7: 2023-05-25 13:36:07.986732: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 7: 2023-05-25 13:36:07.986725: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:07.986723: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:07.986752: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 7: 2023-05-25 13:36:07.986754: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +18: 2023-05-25 13:36:08.003646: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +18: 2023-05-25 13:36:08.003647: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +18: 2023-05-25 13:36:08.003649: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 0: Successfully preprocessed all matching files. + 0: Detected CUDA files, patching ldflags + 0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... + 0: Building extension module scaled_upper_triang_masked_softmax_cuda... + 0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) + 0: Loading extension module scaled_upper_triang_masked_softmax_cuda... + 0: Successfully preprocessed all matching files. + 0: Detected CUDA files, patching ldflags + 0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... + 0: Building extension module scaled_masked_softmax_cuda... + 0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) + 0: Loading extension module scaled_masked_softmax_cuda... + 0: Successfully preprocessed all matching files. + 0: Detected CUDA files, patching ldflags + 0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... + 0: Building extension module fused_mix_prec_layer_norm_cuda... + 0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) + 0: Loading extension module fused_mix_prec_layer_norm_cuda... +28: Successfully preprocessed all matching files. +28: Successfully preprocessed all matching files. +28: Successfully preprocessed all matching files. +28: Successfully preprocessed all matching files. +28: Successfully preprocessed all matching files. +29: Successfully preprocessed all matching files. +29: Successfully preprocessed all matching files. + 4: Successfully preprocessed all matching files. + 4: Successfully preprocessed all matching files. +14: Successfully preprocessed all matching files. +14: Successfully preprocessed all matching files. +14: Successfully preprocessed all matching files. +18: Successfully preprocessed all matching files. +18: Successfully preprocessed all matching files. +31: Successfully preprocessed all matching files. +31: Successfully preprocessed all matching files. + 5: Successfully preprocessed all matching files. +15: Successfully preprocessed all matching files. +15: Successfully preprocessed all matching files. +13: Successfully preprocessed all matching files. +13: Successfully preprocessed all matching files. + 5: Successfully preprocessed all matching files. +19: Successfully preprocessed all matching files. +15: Successfully preprocessed all matching files. +13: Successfully preprocessed all matching files. +10: Successfully preprocessed all matching files. + 3: Successfully preprocessed all matching files. +10: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +10: warnings.warn( +10: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +10: warnings.warn( +10: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +10: warnings.warn( +10: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +10: warnings.warn( +10: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +10: warnings.warn( +10: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +10: warnings.warn( +10: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +10: warnings.warn( +10: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +10: warnings.warn( + 9: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 9: warnings.warn( + 9: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 9: warnings.warn( + 9: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 9: warnings.warn( + 9: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 9: warnings.warn( + 9: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 9: warnings.warn( + 9: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 9: warnings.warn( + 9: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 9: warnings.warn( + 9: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 9: warnings.warn( +22: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +22: warnings.warn( +22: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +22: warnings.warn( +22: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +22: warnings.warn( +22: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +22: warnings.warn( +22: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +22: warnings.warn( +22: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +22: warnings.warn( +22: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +22: warnings.warn( +22: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +22: warnings.warn( +11: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +11: warnings.warn( +11: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +11: warnings.warn( +11: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +11: warnings.warn( +11: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +11: warnings.warn( +11: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +11: warnings.warn( +11: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +11: warnings.warn( +11: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +11: warnings.warn( +11: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +11: warnings.warn( +16: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +16: warnings.warn( +16: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +16: warnings.warn( +16: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +16: warnings.warn( +12: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +12: warnings.warn( +12: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +12: warnings.warn( +12: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +12: warnings.warn( +12: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +12: warnings.warn( +12: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +12: warnings.warn( +12: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +12: warnings.warn( +12: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +12: warnings.warn( +16: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +16: warnings.warn( +18: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +18: warnings.warn( +18: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +18: warnings.warn( +18: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +18: warnings.warn( +18: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +18: warnings.warn( +18: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +18: warnings.warn( +18: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +18: warnings.warn( +18: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +18: warnings.warn( +12: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +12: warnings.warn( +13: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +13: warnings.warn( +13: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +13: warnings.warn( +18: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +18: warnings.warn( +13: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +13: warnings.warn( +13: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +13: warnings.warn( +13: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +13: warnings.warn( +13: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +13: warnings.warn( +21: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +21: warnings.warn( +21: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +21: warnings.warn( +21: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +21: warnings.warn( +21: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +21: warnings.warn( +21: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +21: warnings.warn( +19: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +19: warnings.warn( +19: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +19: warnings.warn( +19: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +19: warnings.warn( +21: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +21: warnings.warn( +19: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +19: warnings.warn( +19: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +19: warnings.warn( +19: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +19: warnings.warn( +21: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +21: warnings.warn( +16: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +16: warnings.warn( +19: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +19: warnings.warn( +19: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +19: warnings.warn( +21: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +21: warnings.warn( +16: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +16: warnings.warn( +16: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +16: warnings.warn( +16: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +16: warnings.warn( +17: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +17: warnings.warn( +17: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +17: warnings.warn( +17: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +17: warnings.warn( +17: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +17: warnings.warn( +17: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +17: warnings.warn( +17: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +17: warnings.warn( +17: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +17: warnings.warn( +23: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +23: warnings.warn( +23: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +23: warnings.warn( +20: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +20: warnings.warn( +20: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +20: warnings.warn( +20: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +20: warnings.warn( +23: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +23: warnings.warn( +23: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +23: warnings.warn( +23: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +23: warnings.warn( +20: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +20: warnings.warn( +20: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +20: warnings.warn( +20: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +20: warnings.warn( +20: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +20: warnings.warn( +20: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +20: warnings.warn( +23: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +23: warnings.warn( +23: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +23: warnings.warn( +17: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +17: warnings.warn( +23: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +23: warnings.warn( +14: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +14: warnings.warn( +15: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +15: warnings.warn( + 8: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 8: warnings.warn( + 8: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 8: warnings.warn( +15: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +15: warnings.warn( +15: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +15: warnings.warn( + 8: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 8: warnings.warn( +15: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +15: warnings.warn( +15: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +15: warnings.warn( + 8: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 8: warnings.warn( + 8: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 8: warnings.warn( + 8: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 8: warnings.warn( + 8: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 8: warnings.warn( + 8: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 8: warnings.warn( +14: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +14: warnings.warn( +14: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +14: warnings.warn( +14: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +14: warnings.warn( +14: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +14: warnings.warn( +14: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +14: warnings.warn( +14: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +14: warnings.warn( +14: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +14: warnings.warn( +13: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +13: warnings.warn( +13: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +13: warnings.warn( +15: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +15: warnings.warn( +15: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +15: warnings.warn( +15: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +15: warnings.warn( + 1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 1: warnings.warn( + 3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 3: warnings.warn( + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 0: warnings.warn( + 3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 3: warnings.warn( + 5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 5: warnings.warn( +29: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +29: warnings.warn( +30: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +30: warnings.warn( +26: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +26: warnings.warn( +26: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +26: warnings.warn( +26: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +26: warnings.warn( +30: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +30: warnings.warn( +27: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +27: warnings.warn( +27: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +27: warnings.warn( + 6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 6: warnings.warn( + 6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 6: warnings.warn( + 2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 2: warnings.warn( + 2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 2: warnings.warn( + 2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 2: warnings.warn( + 2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 2: warnings.warn( + 5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 5: warnings.warn( + 2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 2: warnings.warn( + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 0: warnings.warn( +26: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +26: warnings.warn( +26: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +26: warnings.warn( +29: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +29: warnings.warn( + 7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 7: warnings.warn( + 3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 3: warnings.warn( + 3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 3: warnings.warn( + 1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 1: warnings.warn( + 4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 4: warnings.warn( +24: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +24: warnings.warn( + 3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 3: warnings.warn( + 3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 3: warnings.warn( + 5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 5: warnings.warn( + 6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 6: warnings.warn( +30: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +30: warnings.warn( +30: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +30: warnings.warn( + 5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 5: warnings.warn( + 1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 1: warnings.warn( +24: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +24: warnings.warn( + 6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 6: warnings.warn( + 5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 5: warnings.warn( + 1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 1: warnings.warn( +31: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +31: warnings.warn( + 7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 7: warnings.warn( +29: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +29: warnings.warn( +27: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +27: warnings.warn( +30: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +30: warnings.warn( + 1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 1: warnings.warn( +27: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +27: warnings.warn( +29: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +29: warnings.warn( +29: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +29: warnings.warn( + 7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 7: warnings.warn( + 6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 6: warnings.warn( +30: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +30: warnings.warn( + 6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 6: warnings.warn( + 1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 1: warnings.warn( + 4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 4: warnings.warn( + 1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 1: warnings.warn( +27: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +27: warnings.warn( +27: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +27: warnings.warn( + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 0: warnings.warn( + 4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 4: warnings.warn( + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 0: warnings.warn( +28: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +28: warnings.warn( + 7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 7: warnings.warn( +31: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +31: warnings.warn( +26: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +26: warnings.warn( +28: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +28: warnings.warn( +25: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +25: warnings.warn( +25: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +25: warnings.warn( +25: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +25: warnings.warn( +25: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +25: warnings.warn( +25: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +25: warnings.warn( +25: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +25: warnings.warn( + 2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 2: warnings.warn( + 7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 7: warnings.warn( +24: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +24: warnings.warn( +28: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +28: warnings.warn( +25: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +25: warnings.warn( +24: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +24: warnings.warn( +26: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +26: warnings.warn( + 4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 4: warnings.warn( +26: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +26: warnings.warn( + 2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 2: warnings.warn( + 2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 2: warnings.warn( +31: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +31: warnings.warn( + 7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 7: warnings.warn( +31: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +31: warnings.warn( +29: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +29: warnings.warn( +31: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +31: warnings.warn( +28: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +28: warnings.warn( +30: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +30: warnings.warn( +29: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +29: warnings.warn( +31: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +31: warnings.warn( + 4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 4: warnings.warn( + 6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 6: warnings.warn( + 4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 4: warnings.warn( +30: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +30: warnings.warn( + 6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 6: warnings.warn( +24: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +24: warnings.warn( + 3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 3: warnings.warn( +24: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +24: warnings.warn( +28: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +28: warnings.warn( +28: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +28: warnings.warn( +24: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +24: warnings.warn( +27: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +27: warnings.warn( +27: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +27: warnings.warn( + 5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 5: warnings.warn( + 5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 5: warnings.warn( + 5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 5: warnings.warn( +29: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +29: warnings.warn( + 4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 4: warnings.warn( + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 0: warnings.warn( +28: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +28: warnings.warn( + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 0: warnings.warn( + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 0: warnings.warn( + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 0: warnings.warn( +24: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +24: warnings.warn( + 3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 3: warnings.warn( + 7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 7: warnings.warn( +31: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +31: warnings.warn( +31: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +31: warnings.warn( + 7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 7: warnings.warn( +28: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +28: warnings.warn( + 4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 4: warnings.warn( +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +25: warnings.warn( +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 1: warnings.warn( +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: +31: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... +31: Building extension module utils... +31: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +31: Loading extension module utils... +19: Loading extension module utils... +19: Loading extension module utils... +18: Loading extension module utils... + 8: Loading extension module utils... + 8: Loading extension module utils... +18: Loading extension module utils... + 9: Loading extension module utils... +28: Loading extension module utils... +20: Loading extension module utils... +28: Loading extension module utils... +20: Loading extension module utils... +23: Loading extension module utils... +23: Loading extension module utils... +19: Loading extension module utils... +19: Loading extension module utils... +22: Loading extension module utils... + 9: Loading extension module utils... +21: Loading extension module utils... +22: Loading extension module utils... +21: Loading extension module utils... +27: Loading extension module utils... +27: Loading extension module utils... +10: Loading extension module utils... +10: Loading extension module utils... +16: Loading extension module utils... + 5: Loading extension module utils... + 5: Loading extension module utils... +13: Loading extension module utils... +13: Loading extension module utils... +14: Loading extension module utils... +14: Loading extension module utils... +16: Loading extension module utils... +23: Loading extension module utils... +17: Loading extension module utils... +15: Loading extension module utils... +20: Loading extension module utils... +17: Loading extension module utils... +23: Loading extension module utils... +20: Loading extension module utils... +15: Loading extension module utils... +12: Loading extension module utils... +19: Loading extension module utils... +12: Loading extension module utils... +11: Loading extension module utils... +22: Loading extension module utils... +18: Loading extension module utils... +18: Loading extension module utils... +11: Loading extension module utils... +21: Loading extension module utils... +19: Loading extension module utils... +22: Loading extension module utils... +21: Loading extension module utils... +30: Loading extension module utils... +31: Loading extension module utils... + 3: Loading extension module utils... +30: Loading extension module utils... + 3: Loading extension module utils... +26: Loading extension module utils... +26: Loading extension module utils... + 8: Loading extension module utils... + 8: Loading extension module utils... +24: Loading extension module utils... +10: Loading extension module utils... + 8: Loading extension module utils... +14: Loading extension module utils... + 9: Loading extension module utils... +10: Loading extension module utils... +14: Loading extension module utils... +10: Loading extension module utils... +10: Loading extension module utils... + 9: Loading extension module utils... +14: Loading extension module utils... +14: Loading extension module utils... +13: Loading extension module utils... +13: Loading extension module utils... + 8: Loading extension module utils... +13: Loading extension module utils... +15: Loading extension module utils... +11: Loading extension module utils... + 8: Loading extension module utils... +13: Loading extension module utils... +15: Loading extension module utils... +15: Loading extension module utils... +15: Loading extension module utils... + 8: Loading extension module utils... +25: Loading extension module utils... +25: Loading extension module utils... +24: Loading extension module utils... +12: Loading extension module utils... +12: Loading extension module utils... +12: Loading extension module utils... +11: Loading extension module utils... +12: Loading extension module utils... +11: Loading extension module utils... + 9: Loading extension module utils... +11: Loading extension module utils... + 1: Loading extension module utils... + 9: Loading extension module utils... + 4: Loading extension module utils... + 4: Loading extension module utils... + 7: Loading extension module utils... + 7: Loading extension module utils... + 9: Loading extension module utils... + 9: Loading extension module utils... +29: Loading extension module utils... +29: Loading extension module utils... + 1: Loading extension module utils... +13: Loading extension module utils... +13: Loading extension module utils... +10: Loading extension module utils... +12: Loading extension module utils... +12: Loading extension module utils... +11: Loading extension module utils... + 2: Loading extension module utils... +11: Loading extension module utils... +15: Loading extension module utils... +15: Loading extension module utils... +10: Loading extension module utils... +14: Loading extension module utils... +14: Loading extension module utils... +16: Loading extension module utils... + 0: Loading extension module utils... + 0: Loading extension module utils... + 2: Loading extension module utils... +20: Loading extension module utils... +17: Loading extension module utils... +23: Loading extension module utils... +20: Loading extension module utils... +23: Loading extension module utils... +17: Loading extension module utils... +16: Loading extension module utils... +18: Loading extension module utils... +18: Loading extension module utils... +22: Loading extension module utils... +22: Loading extension module utils... +19: Loading extension module utils... +19: Loading extension module utils... +21: Loading extension module utils... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: Loading extension module utils... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... +25: Building extension module utils... +25: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: No modifications detected for re-loaded extension module utils, skipping build step... +13: Loading extension module utils... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: No modifications detected for re-loaded extension module utils, skipping build step... +13: Loading extension module utils... +13: No modifications detected for re-loaded extension module utils, skipping build step... +13: Loading extension module utils... +13: No modifications detected for re-loaded extension module utils, skipping build step... +13: Loading extension module utils... +13: No modifications detected for re-loaded extension module utils, skipping build step... +13: Loading extension module utils... +13: No modifications detected for re-loaded extension module utils, skipping build step... +13: Loading extension module utils... + 3: No modifications detected for re-loaded extension module utils, skipping build step... + 3: Loading extension module utils... + 3: No modifications detected for re-loaded extension module utils, skipping build step... + 3: Loading extension module utils... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +14: +14: Loading extension module utils...Loading extension module utils... +14: +14: No modifications detected for re-loaded extension module utils, skipping build step... +14: Loading extension module utils... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +14: +14: Loading extension module utils... +14: Loading extension module utils... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: No modifications detected for re-loaded extension module utils, skipping build step... +14: Loading extension module utils... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: No modifications detected for re-loaded extension module utils, skipping build step... +14: Loading extension module utils... +14: No modifications detected for re-loaded extension module utils, skipping build step... +14: Loading extension module utils... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: +10: +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: + 4: No modifications detected for re-loaded extension module utils, skipping build step... + 4: Loading extension module utils... + 4: No modifications detected for re-loaded extension module utils, skipping build step... + 4: Loading extension module utils... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: No modifications detected for re-loaded extension module utils, skipping build step... +10: Loading extension module utils... +10: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +10: +10: Loading extension module utils...Loading extension module utils... +10: +10: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +10: +10: +10: Loading extension module utils...Loading extension module utils...Loading extension module utils... +10: +10: +10: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +10: +10: Loading extension module utils...Loading extension module utils... +10: +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: No modifications detected for re-loaded extension module utils, skipping build step... +28: Loading extension module utils... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: +28: No modifications detected for re-loaded extension module utils, skipping build step... +28: Loading extension module utils... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: No modifications detected for re-loaded extension module utils, skipping build step... +21: Loading extension module utils... +21: No modifications detected for re-loaded extension module utils, skipping build step... +21: Loading extension module utils... +21: No modifications detected for re-loaded extension module utils, skipping build step... +21: Loading extension module utils... +21: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +21: +21: Loading extension module utils...Loading extension module utils... +21: +21: No modifications detected for re-loaded extension module utils, skipping build step... +21: Loading extension module utils... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: No modifications detected for re-loaded extension module utils, skipping build step... +31: Loading extension module utils... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: No modifications detected for re-loaded extension module utils, skipping build step... +31: Loading extension module utils... +19: No modifications detected for re-loaded extension module utils, skipping build step... +19: Loading extension module utils... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: No modifications detected for re-loaded extension module utils, skipping build step... +19: Loading extension module utils... +19: No modifications detected for re-loaded extension module utils, skipping build step... +19: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +19: +19: Loading extension module utils... +19: No modifications detected for re-loaded extension module utils, skipping build step...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: +19: Loading extension module utils... +19: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +19: +19: Loading extension module utils...Loading extension module utils... +19: +19: No modifications detected for re-loaded extension module utils, skipping build step... +19: Loading extension module utils... +24: No modifications detected for re-loaded extension module utils, skipping build step... +24: Loading extension module utils... +24: No modifications detected for re-loaded extension module utils, skipping build step... +24: Loading extension module utils... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: No modifications detected for re-loaded extension module utils, skipping build step... +30: Loading extension module utils... +30: No modifications detected for re-loaded extension module utils, skipping build step... +30: Loading extension module utils... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: + 0: No modifications detected for re-loaded extension module utils, skipping build step... + 0: Loading extension module utils... + 0: No modifications detected for re-loaded extension module utils, skipping build step... + 0: Loading extension module utils... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: No modifications detected for re-loaded extension module utils, skipping build step... +18: Loading extension module utils... +18: No modifications detected for re-loaded extension module utils, skipping build step... +18: Loading extension module utils... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: No modifications detected for re-loaded extension module utils, skipping build step... +18: Loading extension module utils... +18: No modifications detected for re-loaded extension module utils, skipping build step... +18: Loading extension module utils... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: No modifications detected for re-loaded extension module utils, skipping build step... +18: Loading extension module utils... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: No modifications detected for re-loaded extension module utils, skipping build step... +18: Loading extension module utils... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: No modifications detected for re-loaded extension module utils, skipping build step... +17: Loading extension module utils... +17: No modifications detected for re-loaded extension module utils, skipping build step... +17: Loading extension module utils... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: No modifications detected for re-loaded extension module utils, skipping build step... +17: Loading extension module utils... +17: No modifications detected for re-loaded extension module utils, skipping build step... +17: Loading extension module utils... +16: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +16: +16: Loading extension module utils...Loading extension module utils... +16: +16: No modifications detected for re-loaded extension module utils, skipping build step... +16: Loading extension module utils... +16: No modifications detected for re-loaded extension module utils, skipping build step... +16: Loading extension module utils... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: No modifications detected for re-loaded extension module utils, skipping build step... +15: Loading extension module utils... +15: No modifications detected for re-loaded extension module utils, skipping build step... +15: Loading extension module utils... +15: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +15: +15: +15: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils...Loading extension module utils...Loading extension module utils... +15: +15: +15: No modifications detected for re-loaded extension module utils, skipping build step... +15: Loading extension module utils... +15: +15: Loading extension module utils... +15: No modifications detected for re-loaded extension module utils, skipping build step... +15: Loading extension module utils... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: No modifications detected for re-loaded extension module utils, skipping build step... +26: Loading extension module utils... +26: No modifications detected for re-loaded extension module utils, skipping build step... +26: Loading extension module utils... + 9: No modifications detected for re-loaded extension module utils, skipping build step... + 9: Loading extension module utils... + 9: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... + 9: + 9: Loading extension module utils...Loading extension module utils... + 9: + 9: No modifications detected for re-loaded extension module utils, skipping build step... + 9: Loading extension module utils... + 9: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... + 9: + 9: + 9: Loading extension module utils...Loading extension module utils...Loading extension module utils... + 9: + 9: + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: No modifications detected for re-loaded extension module utils, skipping build step... + 9: Loading extension module utils... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: No modifications detected for re-loaded extension module utils, skipping build step... +25: Loading extension module utils... +25: No modifications detected for re-loaded extension module utils, skipping build step... +25: Loading extension module utils... + 5: No modifications detected for re-loaded extension module utils, skipping build step... + 5: Loading extension module utils... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: No modifications detected for re-loaded extension module utils, skipping build step... + 5: Loading extension module utils... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: +23: +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: No modifications detected for re-loaded extension module utils, skipping build step... +12: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +12: +12: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +12: +12: Loading extension module utils... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +12: +12: Loading extension module utils...Loading extension module utils... +12: +12: No modifications detected for re-loaded extension module utils, skipping build step... +12: Loading extension module utils... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: No modifications detected for re-loaded extension module utils, skipping build step... +12: Loading extension module utils... +12: No modifications detected for re-loaded extension module utils, skipping build step... +12: Loading extension module utils... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: No modifications detected for re-loaded extension module utils, skipping build step... +22: Loading extension module utils... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: No modifications detected for re-loaded extension module utils, skipping build step... +22: Loading extension module utils... +22: No modifications detected for re-loaded extension module utils, skipping build step... +22: Loading extension module utils... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: No modifications detected for re-loaded extension module utils, skipping build step... +22: Loading extension module utils... + 8: No modifications detected for re-loaded extension module utils, skipping build step... + 8: Loading extension module utils... + 8: No modifications detected for re-loaded extension module utils, skipping build step...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: + 8: Loading extension module utils... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: No modifications detected for re-loaded extension module utils, skipping build step... + 8: Loading extension module utils...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: + 8: No modifications detected for re-loaded extension module utils, skipping build step... + 8: Loading extension module utils... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: No modifications detected for re-loaded extension module utils, skipping build step... +23: Loading extension module utils... +23: No modifications detected for re-loaded extension module utils, skipping build step... +23: Loading extension module utils... +22: No modifications detected for re-loaded extension module utils, skipping build step... +22: Loading extension module utils... + 8: No modifications detected for re-loaded extension module utils, skipping build step... + 8: Loading extension module utils... +23: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +23: +23: +23: Loading extension module utils...Loading extension module utils... +23: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +22: No modifications detected for re-loaded extension module utils, skipping build step... +22: Loading extension module utils... + 8: No modifications detected for re-loaded extension module utils, skipping build step... + 8: Loading extension module utils... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: +23: +23: Loading extension module utils... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: No modifications detected for re-loaded extension module utils, skipping build step... + 8: Loading extension module utils... + 1: No modifications detected for re-loaded extension module utils, skipping build step... + 1: Loading extension module utils... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: No modifications detected for re-loaded extension module utils, skipping build step... +11: Loading extension module utils... +11: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +11: +11: Loading extension module utils... + 8: No modifications detected for re-loaded extension module utils, skipping build step... + 8: Loading extension module utils... +11: Loading extension module utils... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: No modifications detected for re-loaded extension module utils, skipping build step... +11: Loading extension module utils... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... + 7: + 7: Loading extension module utils...Loading extension module utils... + 7: +11: No modifications detected for re-loaded extension module utils, skipping build step... +11: Loading extension module utils... +11: No modifications detected for re-loaded extension module utils, skipping build step... +11: Loading extension module utils... +11: No modifications detected for re-loaded extension module utils, skipping build step... +11: Loading extension module utils... +11: No modifications detected for re-loaded extension module utils, skipping build step... +11: Loading extension module utils... + 1: No modifications detected for re-loaded extension module utils, skipping build step... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: Loading extension module utils... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: No modifications detected for re-loaded extension module utils, skipping build step... +29: Loading extension module utils... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: No modifications detected for re-loaded extension module utils, skipping build step... +29: Loading extension module utils... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: No modifications detected for re-loaded extension module utils, skipping build step... +27: Loading extension module utils... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: +20: +20: +20: +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Loading extension module utils... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: No modifications detected for re-loaded extension module utils, skipping build step... +20: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +20: +20: Loading extension module utils... +20: No modifications detected for re-loaded extension module utils, skipping build step... +20: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +20: +20: Loading extension module utils... +20: No modifications detected for re-loaded extension module utils, skipping build step... +20: Loading extension module utils... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: No modifications detected for re-loaded extension module utils, skipping build step... +20: Loading extension module utils... +27: No modifications detected for re-loaded extension module utils, skipping build step... +27: Loading extension module utils... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: No modifications detected for re-loaded extension module utils, skipping build step... +13: Loading extension module utils... +13: No modifications detected for re-loaded extension module utils, skipping build step... +13: Loading extension module utils... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: No modifications detected for re-loaded extension module utils, skipping build step... +25: Loading extension module utils... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: No modifications detected for re-loaded extension module utils, skipping build step... + 2: Loading extension module utils... + 2: No modifications detected for re-loaded extension module utils, skipping build step... + 2: Loading extension module utils... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Building extension module utils... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Loading extension module utils... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: No modifications detected for re-loaded extension module utils, skipping build step... +29: Loading extension module utils... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... + 7: Building extension module utils... + 7: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) + 1: Loading extension module utils... + 4: Loading extension module utils... + 6: Loading extension module utils... + 2: Loading extension module utils... + 3: Loading extension module utils... + 5: Loading extension module utils... + 7: Loading extension module utils... + 0: Loading extension module utils... + 1: Loading extension module utils... + 4: Loading extension module utils... + 6: Loading extension module utils... + 2: Loading extension module utils... + 3: Loading extension module utils... + 5: Loading extension module utils... + 7: Loading extension module utils... + 1: Loading extension module utils... + 2: Loading extension module utils... + 3: Loading extension module utils... + 7: Loading extension module utils... + 0: Loading extension module utils... + 2: Loading extension module utils... + 3: Loading extension module utils... + 1: Loading extension module utils... + 4: Loading extension module utils... + 4: Loading extension module utils... + 5: Loading extension module utils... + 5: Loading extension module utils... + 6: Loading extension module utils... + 6: Loading extension module utils... + 7: Loading extension module utils... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: No modifications detected for re-loaded extension module utils, skipping build step... + 3: Loading extension module utils... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: No modifications detected for re-loaded extension module utils, skipping build step... + 4: Loading extension module utils... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: No modifications detected for re-loaded extension module utils, skipping build step... + 3: Loading extension module utils... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: No modifications detected for re-loaded extension module utils, skipping build step... + 2: Loading extension module utils... + 2: No modifications detected for re-loaded extension module utils, skipping build step... + 2: Loading extension module utils... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: + 4: No modifications detected for re-loaded extension module utils, skipping build step... + 4: Loading extension module utils... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... + 2: + 2: Loading extension module utils...Loading extension module utils... + 2: + 4: No modifications detected for re-loaded extension module utils, skipping build step... + 4: Loading extension module utils... + 5: No modifications detected for re-loaded extension module utils, skipping build step... + 5: Loading extension module utils... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: No modifications detected for re-loaded extension module utils, skipping build step... + 5: Loading extension module utils... + 4: No modifications detected for re-loaded extension module utils, skipping build step... + 4: Loading extension module utils... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: No modifications detected for re-loaded extension module utils, skipping build step... + 0: Loading extension module utils... + 3: No modifications detected for re-loaded extension module utils, skipping build step... + 3: Loading extension module utils... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: No modifications detected for re-loaded extension module utils, skipping build step... + 3: Loading extension module utils... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: No modifications detected for re-loaded extension module utils, skipping build step... + 1: Loading extension module utils... + 1: No modifications detected for re-loaded extension module utils, skipping build step... + 1: Loading extension module utils... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: No modifications detected for re-loaded extension module utils, skipping build step... + 5: Loading extension module utils... + 5: No modifications detected for re-loaded extension module utils, skipping build step... + 5: Loading extension module utils... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: No modifications detected for re-loaded extension module utils, skipping build step... + 7: Loading extension module utils... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: No modifications detected for re-loaded extension module utils, skipping build step... + 1: Loading extension module utils... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: No modifications detected for re-loaded extension module utils, skipping build step... + 1: Loading extension module utils... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: No modifications detected for re-loaded extension module utils, skipping build step... + 7: Loading extension module utils... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: No modifications detected for re-loaded extension module utils, skipping build step... + 7: Loading extension module utils... + 0: Loading extension module utils... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: No modifications detected for re-loaded extension module utils, skipping build step... + 7: Loading extension module utils... + 0: Loading extension module utils... + 2: Loading extension module utils... + 5: Loading extension module utils...Loading extension module utils... + 5: + 2: Loading extension module utils... + 6: Loading extension module utils... + 3: Loading extension module utils...Loading extension module utils... + 3: + 6: Loading extension module utils... + 4: Loading extension module utils... + 4: Loading extension module utils... + 1: Loading extension module utils...Loading extension module utils... + 1: + 7: Loading extension module utils... + 7: Loading extension module utils... +24: Loading extension module utils... +24: Loading extension module utils... +25: Loading extension module utils... +25: Loading extension module utils... +26: Loading extension module utils... +26: Loading extension module utils... +24: Loading extension module utils... +25: Loading extension module utils... +29: Loading extension module utils... +28: Loading extension module utils... +27: Loading extension module utils... +16: Loading extension module utils... +18: Loading extension module utils... +28: Loading extension module utils... +27: Loading extension module utils... +30: Loading extension module utils... +17: Loading extension module utils... +16: Loading extension module utils... +17: Loading extension module utils... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: Loading extension module utils... + 0: No modifications detected for re-loaded extension module utils, skipping build step... + 0: Loading extension module utils... +28: Loading extension module utils... +28: Loading extension module utils... +26: Loading extension module utils... +26: Loading extension module utils... +25: Loading extension module utils... +25: Loading extension module utils... +28: Loading extension module utils... +26: Loading extension module utils... +31: Loading extension module utils... +24: Loading extension module utils... +28: Loading extension module utils... +30: Loading extension module utils... +29: Loading extension module utils... +31: Loading extension module utils... +26: Loading extension module utils... +29: Loading extension module utils... +30: Loading extension module utils... +27: Loading extension module utils... +27: Loading extension module utils... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: No modifications detected for re-loaded extension module utils, skipping build step... + 0: Loading extension module utils... +27: Loading extension module utils... +31: Loading extension module utils... +31: Loading extension module utils... +31: Loading extension module utils... +27: Loading extension module utils... +18: Loading extension module utils... +31: Loading extension module utils... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: No modifications detected for re-loaded extension module utils, skipping build step... + 5: Loading extension module utils... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: No modifications detected for re-loaded extension module utils, skipping build step... + 5: Loading extension module utils... + 2: No modifications detected for re-loaded extension module utils, skipping build step... + 2: Loading extension module utils... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: No modifications detected for re-loaded extension module utils, skipping build step... + 2: Loading extension module utils... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: No modifications detected for re-loaded extension module utils, skipping build step... + 4: Loading extension module utils... + 4: No modifications detected for re-loaded extension module utils, skipping build step... + 4: Loading extension module utils... + 3: No modifications detected for re-loaded extension module utils, skipping build step... + 3: Loading extension module utils... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: No modifications detected for re-loaded extension module utils, skipping build step... + 3: Loading extension module utils... +16: Loading extension module utils... +29: Loading extension module utils... +20: Loading extension module utils... +29: Loading extension module utils... +16: Loading extension module utils... +20: Loading extension module utils... +21: Loading extension module utils... +21: Loading extension module utils... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: No modifications detected for re-loaded extension module utils, skipping build step... +24: Loading extension module utils... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...No modifications detected for re-loaded extension module utils, skipping build step... + 1: + 1: Loading extension module utils... +23: Loading extension module utils... + 6: Loading extension module utils... + 1: No modifications detected for re-loaded extension module utils, skipping build step... + 1: Loading extension module utils... + 7: No modifications detected for re-loaded extension module utils, skipping build step... + 7: Loading extension module utils... +23: Loading extension module utils... + 6: Loading extension module utils... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: No modifications detected for re-loaded extension module utils, skipping build step... +25: Loading extension module utils... +17: Loading extension module utils... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: Loading extension module utils... + 7: No modifications detected for re-loaded extension module utils, skipping build step... + 7: Loading extension module utils... +22: Loading extension module utils... +24: No modifications detected for re-loaded extension module utils, skipping build step... +24: Loading extension module utils... +22: Loading extension module utils... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: No modifications detected for re-loaded extension module utils, skipping build step... +25: Loading extension module utils... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: Loading extension module utils... +30: Loading extension module utils... +24: Loading extension module utils... +24: Loading extension module utils... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: No modifications detected for re-loaded extension module utils, skipping build step... +25: Loading extension module utils... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: No modifications detected for re-loaded extension module utils, skipping build step... +24: Loading extension module utils... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: No modifications detected for re-loaded extension module utils, skipping build step... +25: Loading extension module utils... +30: No modifications detected for re-loaded extension module utils, skipping build step... +30: Loading extension module utils... +30: No modifications detected for re-loaded extension module utils, skipping build step... +30: Loading extension module utils... +17: No modifications detected for re-loaded extension module utils, skipping build step... +17: Loading extension module utils... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: No modifications detected for re-loaded extension module utils, skipping build step... +17: Loading extension module utils... +30: No modifications detected for re-loaded extension module utils, skipping build step... +30: Loading extension module utils... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: No modifications detected for re-loaded extension module utils, skipping build step... +30: Loading extension module utils... +25: No modifications detected for re-loaded extension module utils, skipping build step... +25: Loading extension module utils... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +26: +26: Loading extension module utils... +26: Loading extension module utils... +16: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +16: +16: Loading extension module utils...Loading extension module utils... +16: +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: No modifications detected for re-loaded extension module utils, skipping build step... +24: Loading extension module utils... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: No modifications detected for re-loaded extension module utils, skipping build step... +30: Loading extension module utils... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: No modifications detected for re-loaded extension module utils, skipping build step... +30: Loading extension module utils... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: No modifications detected for re-loaded extension module utils, skipping build step... +22: Loading extension module utils... +22: No modifications detected for re-loaded extension module utils, skipping build step... +22: Loading extension module utils... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...No modifications detected for re-loaded extension module utils, skipping build step... +31: +31: Loading extension module utils... +31: No modifications detected for re-loaded extension module utils, skipping build step... +31: Loading extension module utils... +31: No modifications detected for re-loaded extension module utils, skipping build step... +31: Loading extension module utils... +31: No modifications detected for re-loaded extension module utils, skipping build step... +31: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +31: +31: Loading extension module utils... +31: No modifications detected for re-loaded extension module utils, skipping build step... +31: Loading extension module utils... + 0: Loading extension module utils... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: No modifications detected for re-loaded extension module utils, skipping build step... +16: Loading extension module utils... + 0: Loading extension module utils... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: No modifications detected for re-loaded extension module utils, skipping build step... +16: Loading extension module utils... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: No modifications detected for re-loaded extension module utils, skipping build step... +24: Loading extension module utils... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: No modifications detected for re-loaded extension module utils, skipping build step... + 6: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... + 6: + 6: Loading extension module utils... + 6: No modifications detected for re-loaded extension module utils, skipping build step... + 6: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... + 6: + 6: Loading extension module utils... + 6: No modifications detected for re-loaded extension module utils, skipping build step... + 6: Loading extension module utils... + 6: No modifications detected for re-loaded extension module utils, skipping build step... + 6: Loading extension module utils... + 6: No modifications detected for re-loaded extension module utils, skipping build step... + 6: Loading extension module utils... + 6: No modifications detected for re-loaded extension module utils, skipping build step... + 6: Loading extension module utils... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: No modifications detected for re-loaded extension module utils, skipping build step... +24: Loading extension module utils... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: No modifications detected for re-loaded extension module utils, skipping build step... +28: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +28: +28: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +28: +28: Loading extension module utils... +28: No modifications detected for re-loaded extension module utils, skipping build step... +28: Loading extension module utils... +28: No modifications detected for re-loaded extension module utils, skipping build step... +28: Loading extension module utils... +28: No modifications detected for re-loaded extension module utils, skipping build step... +28: Loading extension module utils... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: No modifications detected for re-loaded extension module utils, skipping build step... +27: Loading extension module utils... + 0: No modifications detected for re-loaded extension module utils, skipping build step... + 0: Loading extension module utils... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: No modifications detected for re-loaded extension module utils, skipping build step... + 0: Loading extension module utils... +29: No modifications detected for re-loaded extension module utils, skipping build step... +29: Loading extension module utils... +29: No modifications detected for re-loaded extension module utils, skipping build step... +29: Loading extension module utils... +29: No modifications detected for re-loaded extension module utils, skipping build step... +29: Loading extension module utils... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: No modifications detected for re-loaded extension module utils, skipping build step... +29: Loading extension module utils... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: No modifications detected for re-loaded extension module utils, skipping build step... +27: Loading extension module utils... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: No modifications detected for re-loaded extension module utils, skipping build step... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Loading extension module utils... +27: +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: No modifications detected for re-loaded extension module utils, skipping build step... +27: Loading extension module utils... +27: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +27: +27: Loading extension module utils...Loading extension module utils... +27: +18: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +18: +18: Loading extension module utils... +18: Loading extension module utils... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: No modifications detected for re-loaded extension module utils, skipping build step... +20: Loading extension module utils... +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: No modifications detected for re-loaded extension module utils, skipping build step... +17: Loading extension module utils... +17: No modifications detected for re-loaded extension module utils, skipping build step... +17: Loading extension module utils... +23: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +23: +23: Loading extension module utils...Loading extension module utils... +23: +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: No modifications detected for re-loaded extension module utils, skipping build step... +29: Loading extension module utils... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: No modifications detected for re-loaded extension module utils, skipping build step... +20: Loading extension module utils... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: No modifications detected for re-loaded extension module utils, skipping build step... +26: Loading extension module utils... +26: No modifications detected for re-loaded extension module utils, skipping build step... +26: Loading extension module utils... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: No modifications detected for re-loaded extension module utils, skipping build step... +26: Loading extension module utils... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: No modifications detected for re-loaded extension module utils, skipping build step... +26: Loading extension module utils... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: No modifications detected for re-loaded extension module utils, skipping build step... +21: Loading extension module utils... +21: No modifications detected for re-loaded extension module utils, skipping build step... +21: Loading extension module utils... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: No modifications detected for re-loaded extension module utils, skipping build step... + 0: Loading extension module utils... + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/utils.py:349: UserWarning: Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings + 0: warnings.warn("Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings") diff --git a/8b7178b13b/3583606.out b/8b7178b13b/3583606.out new file mode 100644 index 0000000000000000000000000000000000000000..e35415b13e4e23d2b21b6ba593751fa536521034 --- /dev/null +++ b/8b7178b13b/3583606.out @@ -0,0 +1,14937 @@ +Model parameters: d_model 4096 ffw_size 16384 kv_size 128 n_heads 32 n_layers 42 +Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 4 --pipeline-model-parallel-size 4 --num-layers 42 --hidden-size 4096 --num-attention-heads 32 --kv-channels 128 --ffn-hidden-size 16384 --seq-length 2048 --max-position-embeddings 2048 --micro-batch-size 1 --global-batch-size 512 --train-samples 1 --vocab-file gpt2/vocab.json --merge-file gpt2/merges.txt --clip-grad 1.0 --kill-switch-path kill-switch-8b7178b13bval --bf16 --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 2e-4 --min-lr 2e-5 --lr-decay-style cosine --lr-decay-samples 1 --lr-warmup-samples 0 --clip-grad 1.0 --weight-decay 1e-1 --override-lr-scheduler --reset-progress --no-load-optim --log-interval 10 --save-interval 5000 --eval-interval 1 --eval-iters 100 --eval-only true --tensorboard-dir tensorboard_8b7178b13bval --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save lm1-8b7-178b-c4-repetitions/8b7178b13b --load lm1-8b7-178b-c4-repetitions/8b7178b13b --train-weighted-split-paths-path train400m.txt --valid-weighted-split-paths-path val.txt --data-impl mmap --num-workers 0 --valid-num-workers 0 --deepspeed --deepspeed_config ds_configs/3583606.json --zero-stage 0 +START 3583606: Thu 25 May 2023 01:34:18 PM EEST + 0: + 0: + 0: ======================= ROCm System Management Interface ======================= + 0: ================================= Concise Info ================================= + 0: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 0: 0 45.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 0: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 0: 2 43.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 0: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 0: 4 44.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 0: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 0: 6 35.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 0: 7 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 0: ================================================================================ + 0: ============================= End of ROCm SMI Log ============================== +12: +12: +12: ======================= ROCm System Management Interface ======================= +12: ================================= Concise Info ================================= +12: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +12: 0 47.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +12: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +12: 2 44.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +12: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +12: 4 45.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +12: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +12: 6 41.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +12: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +12: ================================================================================ +12: ============================= End of ROCm SMI Log ============================== + 4: + 4: + 4: ======================= ROCm System Management Interface ======================= + 4: ================================= Concise Info ================================= + 4: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 4: 0 41.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 4: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 4: 2 41.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 4: 3 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 4: 4 39.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 4: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 4: 6 41.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 4: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 4: ================================================================================ + 4: ============================= End of ROCm SMI Log ============================== +13: +13: +13: ======================= ROCm System Management Interface ======================= +13: ================================= Concise Info ================================= +13: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +13: 0 45.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +13: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +13: 2 42.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +13: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +13: 4 46.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +13: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +13: 6 39.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +13: 7 50.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +13: ================================================================================ +13: ============================= End of ROCm SMI Log ============================== +16: +16: +16: ======================= ROCm System Management Interface ======================= +16: ================================= Concise Info ================================= +16: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +16: 0 41.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +16: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +16: 2 38.0c 82.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +16: 3 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +16: 4 42.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +16: 5 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +16: 6 35.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +16: 7 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +16: ================================================================================ +16: ============================= End of ROCm SMI Log ============================== + 8: + 8: + 8: ======================= ROCm System Management Interface ======================= + 8: ================================= Concise Info ================================= + 8: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 8: 0 46.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 8: 1 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 8: 2 41.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 8: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 8: 4 43.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 8: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 8: 6 36.0c 97.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 8: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 8: ================================================================================ + 8: ============================= End of ROCm SMI Log ============================== +18: +18: +18: ======================= ROCm System Management Interface ======================= +18: ================================= Concise Info ================================= +18: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +18: 0 48.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +18: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +18: 2 42.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +18: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +18: 4 44.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +18: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +18: 6 41.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +18: 7 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +18: ================================================================================ +18: ============================= End of ROCm SMI Log ============================== +26: +26: +26: ======================= ROCm System Management Interface ======================= +26: ================================= Concise Info ================================= +26: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +26: 0 43.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +26: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +26: 2 43.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +26: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +26: 4 44.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +26: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +26: 6 36.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +26: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +26: ================================================================================ +26: ============================= End of ROCm SMI Log ============================== +19: +19: +19: ======================= ROCm System Management Interface ======================= +19: ================================= Concise Info ================================= +19: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +19: 0 45.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +19: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +19: 2 43.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +19: 3 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +19: 4 41.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +19: 5 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +19: 6 40.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +19: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +19: ================================================================================ +19: ============================= End of ROCm SMI Log ============================== +31: +31: +31: ======================= ROCm System Management Interface ======================= +31: ================================= Concise Info ================================= +31: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +31: 0 44.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +31: 1 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +31: 2 39.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +31: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +31: 4 43.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +31: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +31: 6 32.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +31: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +31: ================================================================================ +31: ============================= End of ROCm SMI Log ============================== +20: +20: +20: ======================= ROCm System Management Interface ======================= +20: ================================= Concise Info ================================= +20: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +20: 0 43.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +20: 1 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +20: 2 40.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +20: 3 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +20: 4 40.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +20: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +20: 6 35.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +20: 7 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +20: ================================================================================ +20: ============================= End of ROCm SMI Log ============================== +27: +27: +27: ======================= ROCm System Management Interface ======================= +27: ================================= Concise Info ================================= +27: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +27: 0 42.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +27: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +27: 2 38.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +27: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +27: 4 40.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +27: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +27: 6 36.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +27: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +27: ================================================================================ +27: ============================= End of ROCm SMI Log ============================== + 1: + 1: + 1: ======================= ROCm System Management Interface ======================= + 1: ================================= Concise Info ================================= + 1: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 1: 0 47.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 1: 1 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 1: 2 44.0c 80.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 1: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 1: 4 40.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 1: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 1: 6 45.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 1: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 1: ================================================================================ + 1: ============================= End of ROCm SMI Log ============================== + 6: + 6: + 6: ======================= ROCm System Management Interface ======================= + 6: ================================= Concise Info ================================= + 6: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 6: 0 47.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 6: 1 51.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 6: 2 43.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 6: 3 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 6: 4 43.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 6: 5 50.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 6: 6 43.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 6: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 6: ================================================================================ + 6: ============================= End of ROCm SMI Log ============================== +15: +15: +15: ======================= ROCm System Management Interface ======================= +15: ================================= Concise Info ================================= +15: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +15: 0 40.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +15: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +15: 2 43.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +15: 3 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +15: 4 40.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +15: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +15: 6 43.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +15: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +15: ================================================================================ +15: ============================= End of ROCm SMI Log ============================== +10: +10: +10: ======================= ROCm System Management Interface ======================= +10: ================================= Concise Info ================================= +10: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +10: 0 46.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +10: 1 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +10: 2 42.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +10: 3 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +10: 4 44.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +10: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +10: 6 40.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +10: 7 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +10: ================================================================================ +10: ============================= End of ROCm SMI Log ============================== +11: +11: +11: ======================= ROCm System Management Interface ======================= +11: ================================= Concise Info ================================= +11: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +11: 0 43.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +11: 1 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +11: 2 37.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +11: 3 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +11: 4 42.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +11: 5 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +11: 6 43.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +11: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +11: ================================================================================ +11: ============================= End of ROCm SMI Log ============================== + 9: + 9: + 9: ======================= ROCm System Management Interface ======================= + 9: ================================= Concise Info ================================= + 9: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 9: 0 42.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 9: 1 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 9: 2 42.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 9: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 9: 4 39.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 9: 5 54.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 9: 6 42.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 9: 7 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 9: ================================================================================ + 9: ============================= End of ROCm SMI Log ============================== + 5: + 5: + 5: ======================= ROCm System Management Interface ======================= + 5: ================================= Concise Info ================================= + 5: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 5: 0 50.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 5: 1 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 5: 2 44.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 5: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 5: 4 43.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 5: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 5: 6 42.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 5: 7 40.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 5: ================================================================================ + 5: ============================= End of ROCm SMI Log ============================== +17: +17: +17: ======================= ROCm System Management Interface ======================= +17: ================================= Concise Info ================================= +17: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +17: 0 42.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +17: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +17: 2 41.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +17: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +17: 4 45.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +17: 5 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +17: 6 41.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +17: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +17: ================================================================================ +17: ============================= End of ROCm SMI Log ============================== +24: +24: +24: ======================= ROCm System Management Interface ======================= +24: ================================= Concise Info ================================= +24: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +24: 0 47.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +24: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +24: 2 41.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +24: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +24: 4 51.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +24: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +24: 6 41.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +24: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +24: ================================================================================ +24: ============================= End of ROCm SMI Log ============================== +14: +14: +14: ======================= ROCm System Management Interface ======================= +14: ================================= Concise Info ================================= +14: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +14: 0 45.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +14: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +14: 2 46.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +14: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +14: 4 44.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +14: 5 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +14: 6 40.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +14: 7 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +14: ================================================================================ +14: ============================= End of ROCm SMI Log ============================== +28: +28: +28: ======================= ROCm System Management Interface ======================= +28: ================================= Concise Info ================================= +28: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +28: 0 47.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +28: 1 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +28: 2 41.0c 82.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +28: 3 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +28: 4 38.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +28: 5 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +28: 6 36.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +28: 7 40.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +28: ================================================================================ +28: ============================= End of ROCm SMI Log ============================== +25: +25: +25: ======================= ROCm System Management Interface ======================= +25: ================================= Concise Info ================================= +25: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +25: 0 43.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +25: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +25: 2 37.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +25: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +25: 4 44.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +25: 5 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +25: 6 40.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +25: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +25: ================================================================================ +25: ============================= End of ROCm SMI Log ============================== + 2: + 2: + 2: ======================= ROCm System Management Interface ======================= + 2: ================================= Concise Info ================================= + 2: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 2: 0 47.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 2: 1 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 2: 2 46.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 2: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 2: 4 42.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 2: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 2: 6 47.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 2: 7 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 2: ================================================================================ + 2: ============================= End of ROCm SMI Log ============================== +29: +29: +29: ======================= ROCm System Management Interface ======================= +29: ================================= Concise Info ================================= +29: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +29: 0 44.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +29: 1 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +29: 2 43.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +29: 3 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +29: 4 41.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +29: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +29: 6 47.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +29: 7 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +29: ================================================================================ +29: ============================= End of ROCm SMI Log ============================== +23: +23: +23: ======================= ROCm System Management Interface ======================= +23: ================================= Concise Info ================================= +23: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +23: 0 45.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +23: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +23: 2 42.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +23: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +23: 4 36.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +23: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +23: 6 45.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +23: 7 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +23: ================================================================================ +23: ============================= End of ROCm SMI Log ============================== +21: +21: +21: ======================= ROCm System Management Interface ======================= +21: ================================= Concise Info ================================= +21: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +21: 0 41.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +21: 1 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +21: 2 41.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +21: 3 50.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +21: 4 41.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +21: 5 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +21: 6 40.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +21: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +21: ================================================================================ +21: ============================= End of ROCm SMI Log ============================== + 7: + 7: + 7: ======================= ROCm System Management Interface ======================= + 7: ================================= Concise Info ================================= + 7: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 7: 0 43.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 7: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 7: 2 40.0c 97.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 7: 3 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 7: 4 44.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 7: 5 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 7: 6 35.0c 81.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 7: 7 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 7: ================================================================================ + 7: ============================= End of ROCm SMI Log ============================== + 3: + 3: + 3: ======================= ROCm System Management Interface ======================= + 3: ================================= Concise Info ================================= + 3: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 3: 0 43.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 3: 1 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 3: 2 40.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 3: 3 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 3: 4 37.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 3: 5 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 3: 6 39.0c 81.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 3: 7 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 3: ================================================================================ + 3: ============================= End of ROCm SMI Log ============================== +30: +30: +30: ======================= ROCm System Management Interface ======================= +30: ================================= Concise Info ================================= +30: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +30: 0 51.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +30: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +30: 2 38.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +30: 3 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +30: 4 48.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +30: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +30: 6 38.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +30: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +30: ================================================================================ +30: ============================= End of ROCm SMI Log ============================== +22: +22: +22: ======================= ROCm System Management Interface ======================= +22: ================================= Concise Info ================================= +22: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +22: 0 46.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +22: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +22: 2 40.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +22: 3 37.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +22: 4 41.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +22: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +22: 6 43.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +22: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +22: ================================================================================ +22: ============================= End of ROCm SMI Log ============================== +31: Launching on nid006582 (31/32), master nid006551 port 9999, GPUs 8, CUDA: True + 6: Launching on nid006557 (6/32), master nid006551 port 9999, GPUs 8, CUDA: True + 9: Launching on nid006560 (9/32), master nid006551 port 9999, GPUs 8, CUDA: True + 2: Launching on nid006553 (2/32), master nid006551 port 9999, GPUs 8, CUDA: True +15: Launching on nid006566 (15/32), master nid006551 port 9999, GPUs 8, CUDA: True +20: Launching on nid006571 (20/32), master nid006551 port 9999, GPUs 8, CUDA: True +17: Launching on nid006568 (17/32), master nid006551 port 9999, GPUs 8, CUDA: True + 5: Launching on nid006556 (5/32), master nid006551 port 9999, GPUs 8, CUDA: True +12: Launching on nid006563 (12/32), master nid006551 port 9999, GPUs 8, CUDA: True +13: Launching on nid006564 (13/32), master nid006551 port 9999, GPUs 8, CUDA: True +11: Launching on nid006562 (11/32), master nid006551 port 9999, GPUs 8, CUDA: True +21: Launching on nid006572 (21/32), master nid006551 port 9999, GPUs 8, CUDA: True + 0: Launching on nid006551 (0/32), master nid006551 port 9999, GPUs 8, CUDA: True +10: Launching on nid006561 (10/32), master nid006551 port 9999, GPUs 8, CUDA: True + 1: Launching on nid006552 (1/32), master nid006551 port 9999, GPUs 8, CUDA: True +18: Launching on nid006569 (18/32), master nid006551 port 9999, GPUs 8, CUDA: True + 4: Launching on nid006555 (4/32), master nid006551 port 9999, GPUs 8, CUDA: True +28: Launching on nid006579 (28/32), master nid006551 port 9999, GPUs 8, CUDA: True + 8: Launching on nid006559 (8/32), master nid006551 port 9999, GPUs 8, CUDA: True +24: Launching on nid006575 (24/32), master nid006551 port 9999, GPUs 8, CUDA: True +25: Launching on nid006576 (25/32), master nid006551 port 9999, GPUs 8, CUDA: True +27: Launching on nid006578 (27/32), master nid006551 port 9999, GPUs 8, CUDA: True +16: Launching on nid006567 (16/32), master nid006551 port 9999, GPUs 8, CUDA: True +26: Launching on nid006577 (26/32), master nid006551 port 9999, GPUs 8, CUDA: True +19: Launching on nid006570 (19/32), master nid006551 port 9999, GPUs 8, CUDA: True +23: Launching on nid006574 (23/32), master nid006551 port 9999, GPUs 8, CUDA: True +30: Launching on nid006581 (30/32), master nid006551 port 9999, GPUs 8, CUDA: True +14: Launching on nid006565 (14/32), master nid006551 port 9999, GPUs 8, CUDA: True +22: Launching on nid006573 (22/32), master nid006551 port 9999, GPUs 8, CUDA: True +29: Launching on nid006580 (29/32), master nid006551 port 9999, GPUs 8, CUDA: True + 7: Launching on nid006558 (7/32), master nid006551 port 9999, GPUs 8, CUDA: True + 3: Launching on nid006554 (3/32), master nid006551 port 9999, GPUs 8, CUDA: True + 0: using world size: 256, data-parallel-size: 16, tensor-model-parallel size: 4, pipeline-model-parallel size: 4 + 0: accumulate and all-reduce gradients in fp32 for bfloat16 data type. + 0: using torch.bfloat16 for parameters ... + 0: ------------------------ arguments ------------------------ + 0: abort_on_unmet_fused_kernel_constraints ......... False + 0: accumulate_allreduce_grads_in_fp32 .............. True + 0: adam_beta1 ...................................... 0.9 + 0: adam_beta2 ...................................... 0.999 + 0: adam_eps ........................................ 1e-08 + 0: adlr_autoresume ................................. False + 0: adlr_autoresume_interval ........................ 1000 + 0: apply_query_key_layer_scaling ................... True + 0: apply_residual_connection_post_layernorm ........ False + 0: attention_dropout ............................... 0.1 + 0: attention_softmax_in_fp32 ....................... False + 0: bert_binary_head ................................ True + 0: bert_load ....................................... None + 0: bf16 ............................................ True + 0: bias_dropout_fusion ............................. True + 0: bias_gelu_fusion ................................ True + 0: biencoder_projection_dim ........................ 0 + 0: biencoder_shared_query_context_model ............ False + 0: block_data_path ................................. None + 0: checkpoint_activations .......................... False + 0: checkpoint_in_cpu ............................... False + 0: checkpoint_num_layers ........................... 1 + 0: clip_grad ....................................... 1.0 + 0: codecarbon_dir .................................. None + 0: consumed_train_samples .......................... 0 + 0: consumed_train_tokens ........................... 0 + 0: consumed_valid_samples .......................... 0 + 0: contigious_checkpointing ........................ False + 0: cpu_optimizer ................................... False + 0: cpu_torch_adam .................................. False + 0: curriculum_learning ............................. False + 0: data_impl ....................................... mmap + 0: data_parallel_size .............................. 16 + 0: data_path ....................................... None + 0: dataloader_type ................................. single + 0: DDP_impl ........................................ local + 0: decoder_seq_length .............................. None + 0: deepscale ....................................... False + 0: deepscale_config ................................ None + 0: deepspeed ....................................... True + 0: deepspeed_activation_checkpointing .............. False + 0: deepspeed_config ................................ ds_configs/3583606.json + 0: deepspeed_mpi ................................... False + 0: distribute_checkpointed_activations ............. False + 0: distributed_backend ............................. nccl + 0: embed_layernorm ................................. False + 0: embedding_path .................................. None + 0: encoder_seq_length .............................. 2048 + 0: eod_mask_loss ................................... False + 0: eval_interval ................................... 1 + 0: eval_iters ...................................... 100 + 0: eval_only ....................................... True + 0: evidence_data_path .............................. None + 0: exit_duration_in_mins ........................... None + 0: exit_interval ................................... None + 0: ffn_hidden_size ................................. 16384 + 0: finetune ........................................ False + 0: fp16 ............................................ False + 0: fp16_lm_cross_entropy ........................... False + 0: fp32_residual_connection ........................ False + 0: gigaflos_no_embeds .............................. 0 + 0: global_batch_size ............................... 512 + 0: glu_activation .................................. None + 0: hidden_dropout .................................. 0.1 + 0: hidden_size ..................................... 4096 + 0: hysteresis ...................................... 2 + 0: ict_head_size ................................... None + 0: ict_load ........................................ None + 0: img_dim ......................................... 224 + 0: indexer_batch_size .............................. 128 + 0: indexer_log_interval ............................ 1000 + 0: inference ....................................... False + 0: init_method_std ................................. 0.02 + 0: init_method_xavier_uniform ...................... False + 0: initial_loss_scale .............................. 4294967296 + 0: kill_switch_path ................................ kill-switch-8b7178b13bval + 0: kv_channels ..................................... 128 + 0: layer_norm_fusion ............................... True + 0: layernorm_epsilon ............................... 1e-05 + 0: lazy_mpu_init ................................... None + 0: load ............................................ lm1-8b7-178b-c4-repetitions/8b7178b13b + 0: local_rank ...................................... None + 0: log_batch_size_to_tensorboard ................... True + 0: log_interval .................................... 10 + 0: log_learning_rate_to_tensorboard ................ True + 0: log_level ....................................... None + 0: log_level_replica ............................... None + 0: log_loss_scale_to_tensorboard ................... True + 0: log_num_zeros_in_grad ........................... False + 0: log_params_norm ................................. False + 0: log_path ........................................ None + 0: log_timers_to_tensorboard ....................... True + 0: log_validation_ppl_to_tensorboard ............... True + 0: loss_on_targets_only ............................ False + 0: loss_scale ...................................... None + 0: loss_scale_window ............................... 1000 + 0: lr .............................................. 0.0002 + 0: lr_decay_iters .................................. None + 0: lr_decay_samples ................................ 1 + 0: lr_decay_style .................................. cosine + 0: lr_decay_tokens ................................. None + 0: lr_warmup_fraction .............................. None + 0: lr_warmup_iters ................................. 0 + 0: lr_warmup_samples ............................... 0 + 0: make_vocab_size_divisible_by .................... 128 + 0: mask_prob ....................................... 0.15 + 0: masked_softmax_fusion ........................... True + 0: max_position_embeddings ......................... 2048 + 0: mean_noise_span_length .......................... None + 0: memory_centric_tiled_linear ..................... False + 0: merge_file ...................................... gpt2/merges.txt + 0: micro_batch_size ................................ 1 + 0: min_loss_scale .................................. 1.0 + 0: min_lr .......................................... 2e-05 + 0: mmap_warmup ..................................... False + 0: no_load_optim ................................... True + 0: no_load_rng ..................................... None + 0: no_save_optim ................................... None + 0: no_save_rng ..................................... None + 0: noise_density ................................... None + 0: num_attention_heads ............................. 32 + 0: num_channels .................................... 3 + 0: num_classes ..................................... 1000 + 0: num_layers ...................................... 42 + 0: num_layers_per_virtual_pipeline_stage ........... None + 0: num_workers ..................................... 0 + 0: onnx_safe ....................................... None + 0: openai_gelu ..................................... False + 0: optimizer ....................................... adam + 0: optimizer_fusion ................................ True + 0: override_lr_scheduler ........................... True + 0: pad_vocab_size_to ............................... None + 0: params_dtype .................................... torch.bfloat16 + 0: partition_activations ........................... False + 0: patch_dim ....................................... 16 + 0: pipeline_model_parallel_size .................... 4 + 0: position_embedding_type ......................... PositionEmbeddingType.absolute + 0: pp_partition_method ............................. None + 0: profile_backward ................................ False + 0: query_in_block_prob ............................. 0.1 + 0: rampup_batch_size ............................... None + 0: rank ............................................ 0 + 0: remote_device ................................... none + 0: reset_attention_mask ............................ False + 0: reset_position_ids .............................. False + 0: reset_progress .................................. True + 0: retriever_report_topk_accuracies ................ [] + 0: retriever_score_scaling ......................... False + 0: retriever_seq_length ............................ 256 + 0: reweight_loss_based_on_position_frequency ....... False + 0: sample_rate ..................................... 1.0 + 0: save ............................................ lm1-8b7-178b-c4-repetitions/8b7178b13b + 0: save_interval ................................... 5000 + 0: scatter_gather_tensors_in_pipeline .............. True + 0: scattered_embeddings ............................ False + 0: seed ............................................ 1234 + 0: seq_length ...................................... 2048 + 0: sgd_momentum .................................... 0.9 + 0: short_seq_prob .................................. 0.1 + 0: skip_train_iteration_range ...................... None + 0: split ........................................... None + 0: split_transformers .............................. False + 0: sync_tp_duplicated_parameters ................... False + 0: synchronize_each_layer .......................... False + 0: tensor_model_parallel_size ...................... 4 + 0: tensorboard_dir ................................. tensorboard_8b7178b13bval + 0: tensorboard_log_interval ........................ 1 + 0: tensorboard_queue_size .......................... 5 + 0: test_weighted_split_paths ....................... None + 0: test_weighted_split_paths_path .................. None + 0: tile_factor ..................................... 1 + 0: titles_data_path ................................ None + 0: tokenizer_name_or_path .......................... None + 0: tokenizer_type .................................. GPT2BPETokenizer + 0: train_iters ..................................... None + 0: train_samples ................................... 1 + 0: train_tokens .................................... None + 0: train_weighted_split_names ...................... ['train'] + 0: train_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_400M_text_document']] + 0: train_weighted_split_paths_path ................. None + 0: train_weighted_split_splits ..................... [['0:1']] + 0: train_weighted_split_weights .................... [['1.0']] + 0: universal_checkpoint ............................ False + 0: use_bnb_optimizer ............................... False + 0: use_checkpoint_lr_scheduler ..................... False + 0: use_contiguous_buffers_in_ddp ................... True + 0: use_cpu_initialization .......................... None + 0: use_one_sent_docs ............................... False + 0: use_pin_memory .................................. False + 0: valid_num_workers ............................... 0 + 0: valid_weighted_split_names ...................... ['validation'] + 0: valid_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document']] + 0: valid_weighted_split_paths_path ................. None + 0: valid_weighted_split_splits ..................... [['0:1']] + 0: valid_weighted_split_weights .................... [['1.0']] + 0: virtual_pipeline_model_parallel_size ............ None + 0: vocab_extra_ids ................................. 0 + 0: vocab_file ...................................... gpt2/vocab.json + 0: weight_decay .................................... 0.1 + 0: world_size ...................................... 256 + 0: zero_allgather_bucket_size ...................... 0.0 + 0: zero_contigious_gradients ....................... False + 0: zero_reduce_bucket_size ......................... 0.0 + 0: zero_reduce_scatter ............................. False + 0: zero_stage ...................................... 0 + 0: -------------------- end of arguments --------------------- + 0: setting number of micro-batches to constant 32 + 0: > building GPT2BPETokenizer tokenizer ... + 0: > padded vocab (size: 50257) with 431 dummy tokens (new size: 50688) + 0: DeepSpeed general environment info: + 0: torch install path ............... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch'] + 0: torch version .................... 1.13.0+rocm5.2 + 0: torch cuda version ............... None + 0: torch hip version ................ 5.2.21151-afdc89f8 + 0: nvcc version ..................... None + 0: deepspeed install path ........... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/deepspeed'] + 0: deepspeed info ................... 0.7.5, unknown, unknown + 0: deepspeed wheel compiled w. ...... torch 1.13, hip 5.1 + 0: **** Git info for Megatron: git_hash=unknown git_branch=unknown **** + 0: > initializing torch distributed ... + 0: [2023-05-25 13:37:21,573] [INFO] [comm.py:633:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +31: > setting tensorboard ... + 0: > initializing tensor model parallel with size 4 + 0: > initializing pipeline model parallel with size 4 + 0: > setting random seeds to 1234 ... + 0: > initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234 + 0: > compiling dataset index builder ... + 0: make: Entering directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' + 0: make: Nothing to be done for 'default'. + 0: make: Leaving directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' + 0: >>> done with dataset index builder. Compilation time: 0.109 seconds + 0: > compiling and loading fused kernels ... + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.cpp [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.hip [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] + 0: Total number of unsupported CUDA function calls: 0 + 0: + 0: + 0: Total number of replaced kernel launches: 87 + 0: [1/1] c++ scaled_upper_triang_masked_softmax_hip.o scaled_upper_triang_masked_softmax_hip.cuda.o -shared -L/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/opt/rocm/lib -lamdhip64 -o scaled_upper_triang_masked_softmax_cuda.so + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.cpp [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.hip [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] + 0: Total number of unsupported CUDA function calls: 0 + 0: + 0: + 0: Total number of replaced kernel launches: 63 + 0: ninja: no work to do. + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda_kernel.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_hip_kernel.hip [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] + 0: Total number of unsupported CUDA function calls: 0 + 0: + 0: + 0: Total number of replaced kernel launches: 67 + 0: [1/1] c++ layer_norm_hip_kernel.cuda.o layer_norm_cuda.o -shared -L/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/opt/rocm/lib -lamdhip64 -o fused_mix_prec_layer_norm_cuda.so + 0: >>> done with compiling and loading fused kernels. Compilation time: 27.208 seconds + 0: time to initialize megatron (seconds): -0.166 + 0: [after megatron is initialized] datetime: 2023-05-25 13:37:51 + 0: building GPT model ... + 0: [2023-05-25 13:37:51,985] [INFO] [utils.py:827:see_memory_usage] Before Building Model + 0: [2023-05-25 13:37:51,986] [INFO] [utils.py:828:see_memory_usage] MA 0.0 GB Max_MA 0.0 GB CA 0.0 GB Max_CA 0 GB + 0: [2023-05-25 13:37:51,986] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 40.87 GB, percent = 8.1% + 0: SEED_LAYERS=False BASE_SEED=1234 SEED_FN=None + 0: Using topology: {ProcessCoord(pipe=0, data=0, model=0): 0, ProcessCoord(pipe=0, data=0, model=1): 1, ProcessCoord(pipe=0, data=0, model=2): 2, ProcessCoord(pipe=0, data=0, model=3): 3, ProcessCoord(pipe=0, data=1, model=0): 4, ProcessCoord(pipe=0, data=1, model=1): 5, ProcessCoord(pipe=0, data=1, model=2): 6, ProcessCoord(pipe=0, data=1, model=3): 7, ProcessCoord(pipe=0, data=2, model=0): 8, ProcessCoord(pipe=0, data=2, model=1): 9, ProcessCoord(pipe=0, data=2, model=2): 10, ProcessCoord(pipe=0, data=2, model=3): 11, ProcessCoord(pipe=0, data=3, model=0): 12, ProcessCoord(pipe=0, data=3, model=1): 13, ProcessCoord(pipe=0, data=3, model=2): 14, ProcessCoord(pipe=0, data=3, model=3): 15, ProcessCoord(pipe=0, data=4, model=0): 16, ProcessCoord(pipe=0, data=4, model=1): 17, ProcessCoord(pipe=0, data=4, model=2): 18, ProcessCoord(pipe=0, data=4, model=3): 19, ProcessCoord(pipe=0, data=5, model=0): 20, ProcessCoord(pipe=0, data=5, model=1): 21, ProcessCoord(pipe=0, data=5, model=2): 22, ProcessCoord(pipe=0, data=5, + 0: model=3): 23, ProcessCoord(pipe=0, data=6, model=0): 24, ProcessCoord(pipe=0, data=6, model=1): 25, ProcessCoord(pipe=0, data=6, model=2): 26, ProcessCoord(pipe=0, data=6, model=3): 27, ProcessCoord(pipe=0, data=7, model=0): 28, ProcessCoord(pipe=0, data=7, model=1): 29, ProcessCoord(pipe=0, data=7, model=2): 30, ProcessCoord(pipe=0, data=7, model=3): 31, ProcessCoord(pipe=0, data=8, model=0): 32, ProcessCoord(pipe=0, data=8, model=1): 33, ProcessCoord(pipe=0, data=8, model=2): 34, ProcessCoord(pipe=0, data=8, model=3): 35, ProcessCoord(pipe=0, data=9, model=0): 36, ProcessCoord(pipe=0, data=9, model=1): 37, ProcessCoord(pipe=0, data=9, model=2): 38, ProcessCoord(pipe=0, data=9, model=3): 39, ProcessCoord(pipe=0, data=10, model=0): 40, ProcessCoord(pipe=0, data=10, model=1): 41, ProcessCoord(pipe=0, data=10, model=2): 42, ProcessCoord(pipe=0, data=10, model=3): 43, ProcessCoord(pipe=0, data=11, model=0): 44, ProcessCoord(pipe=0, data=11, model=1): 45, ProcessCoord(pipe=0, data=11, model=2): 46, ProcessCoord( + 0: pipe=0, data=11, model=3): 47, ProcessCoord(pipe=0, data=12, model=0): 48, ProcessCoord(pipe=0, data=12, model=1): 49, ProcessCoord(pipe=0, data=12, model=2): 50, ProcessCoord(pipe=0, data=12, model=3): 51, ProcessCoord(pipe=0, data=13, model=0): 52, ProcessCoord(pipe=0, data=13, model=1): 53, ProcessCoord(pipe=0, data=13, model=2): 54, ProcessCoord(pipe=0, data=13, model=3): 55, ProcessCoord(pipe=0, data=14, model=0): 56, ProcessCoord(pipe=0, data=14, model=1): 57, ProcessCoord(pipe=0, data=14, model=2): 58, ProcessCoord(pipe=0, data=14, model=3): 59, ProcessCoord(pipe=0, data=15, model=0): 60, ProcessCoord(pipe=0, data=15, model=1): 61, ProcessCoord(pipe=0, data=15, model=2): 62, ProcessCoord(pipe=0, data=15, model=3): 63, ProcessCoord(pipe=1, data=0, model=0): 64, ProcessCoord(pipe=1, data=0, model=1): 65, ProcessCoord(pipe=1, data=0, model=2): 66, ProcessCoord(pipe=1, data=0, model=3): 67, ProcessCoord(pipe=1, data=1, model=0): 68, ProcessCoord(pipe=1, data=1, model=1): 69, ProcessCoord(pipe=1, data=1, mo + 0: del=2): 70, ProcessCoord(pipe=1, data=1, model=3): 71, ProcessCoord(pipe=1, data=2, model=0): 72, ProcessCoord(pipe=1, data=2, model=1): 73, ProcessCoord(pipe=1, data=2, model=2): 74, ProcessCoord(pipe=1, data=2, model=3): 75, ProcessCoord(pipe=1, data=3, model=0): 76, ProcessCoord(pipe=1, data=3, model=1): 77, ProcessCoord(pipe=1, data=3, model=2): 78, ProcessCoord(pipe=1, data=3, model=3): 79, ProcessCoord(pipe=1, data=4, model=0): 80, ProcessCoord(pipe=1, data=4, model=1): 81, ProcessCoord(pipe=1, data=4, model=2): 82, ProcessCoord(pipe=1, data=4, model=3): 83, ProcessCoord(pipe=1, data=5, model=0): 84, ProcessCoord(pipe=1, data=5, model=1): 85, ProcessCoord(pipe=1, data=5, model=2): 86, ProcessCoord(pipe=1, data=5, model=3): 87, ProcessCoord(pipe=1, data=6, model=0): 88, ProcessCoord(pipe=1, data=6, model=1): 89, ProcessCoord(pipe=1, data=6, model=2): 90, ProcessCoord(pipe=1, data=6, model=3): 91, ProcessCoord(pipe=1, data=7, model=0): 92, ProcessCoord(pipe=1, data=7, model=1): 93, ProcessCoord(pipe=1, da + 0: ta=7, model=2): 94, ProcessCoord(pipe=1, data=7, model=3): 95, ProcessCoord(pipe=1, data=8, model=0): 96, ProcessCoord(pipe=1, data=8, model=1): 97, ProcessCoord(pipe=1, data=8, model=2): 98, ProcessCoord(pipe=1, data=8, model=3): 99, ProcessCoord(pipe=1, data=9, model=0): 100, ProcessCoord(pipe=1, data=9, model=1): 101, ProcessCoord(pipe=1, data=9, model=2): 102, ProcessCoord(pipe=1, data=9, model=3): 103, ProcessCoord(pipe=1, data=10, model=0): 104, ProcessCoord(pipe=1, data=10, model=1): 105, ProcessCoord(pipe=1, data=10, model=2): 106, ProcessCoord(pipe=1, data=10, model=3): 107, ProcessCoord(pipe=1, data=11, model=0): 108, ProcessCoord(pipe=1, data=11, model=1): 109, ProcessCoord(pipe=1, data=11, model=2): 110, ProcessCoord(pipe=1, data=11, model=3): 111, ProcessCoord(pipe=1, data=12, model=0): 112, ProcessCoord(pipe=1, data=12, model=1): 113, ProcessCoord(pipe=1, data=12, model=2): 114, ProcessCoord(pipe=1, data=12, model=3): 115, ProcessCoord(pipe=1, data=13, model=0): 116, ProcessCoord(pipe=1, data=13 + 0: , model=1): 117, ProcessCoord(pipe=1, data=13, model=2): 118, ProcessCoord(pipe=1, data=13, model=3): 119, ProcessCoord(pipe=1, data=14, model=0): 120, ProcessCoord(pipe=1, data=14, model=1): 121, ProcessCoord(pipe=1, data=14, model=2): 122, ProcessCoord(pipe=1, data=14, model=3): 123, ProcessCoord(pipe=1, data=15, model=0): 124, ProcessCoord(pipe=1, data=15, model=1): 125, ProcessCoord(pipe=1, data=15, model=2): 126, ProcessCoord(pipe=1, data=15, model=3): 127, ProcessCoord(pipe=2, data=0, model=0): 128, ProcessCoord(pipe=2, data=0, model=1): 129, ProcessCoord(pipe=2, data=0, model=2): 130, ProcessCoord(pipe=2, data=0, model=3): 131, ProcessCoord(pipe=2, data=1, model=0): 132, ProcessCoord(pipe=2, data=1, model=1): 133, ProcessCoord(pipe=2, data=1, model=2): 134, ProcessCoord(pipe=2, data=1, model=3): 135, ProcessCoord(pipe=2, data=2, model=0): 136, ProcessCoord(pipe=2, data=2, model=1): 137, ProcessCoord(pipe=2, data=2, model=2): 138, ProcessCoord(pipe=2, data=2, model=3): 139, ProcessCoord(pipe=2, data=3, + 0: model=0): 140, ProcessCoord(pipe=2, data=3, model=1): 141, ProcessCoord(pipe=2, data=3, model=2): 142, ProcessCoord(pipe=2, data=3, model=3): 143, ProcessCoord(pipe=2, data=4, model=0): 144, ProcessCoord(pipe=2, data=4, model=1): 145, ProcessCoord(pipe=2, data=4, model=2): 146, ProcessCoord(pipe=2, data=4, model=3): 147, ProcessCoord(pipe=2, data=5, model=0): 148, ProcessCoord(pipe=2, data=5, model=1): 149, ProcessCoord(pipe=2, data=5, model=2): 150, ProcessCoord(pipe=2, data=5, model=3): 151, ProcessCoord(pipe=2, data=6, model=0): 152, ProcessCoord(pipe=2, data=6, model=1): 153, ProcessCoord(pipe=2, data=6, model=2): 154, ProcessCoord(pipe=2, data=6, model=3): 155, ProcessCoord(pipe=2, data=7, model=0): 156, ProcessCoord(pipe=2, data=7, model=1): 157, ProcessCoord(pipe=2, data=7, model=2): 158, ProcessCoord(pipe=2, data=7, model=3): 159, ProcessCoord(pipe=2, data=8, model=0): 160, ProcessCoord(pipe=2, data=8, model=1): 161, ProcessCoord(pipe=2, data=8, model=2): 162, ProcessCoord(pipe=2, data=8, model=3): 16 + 0: 3, ProcessCoord(pipe=2, data=9, model=0): 164, ProcessCoord(pipe=2, data=9, model=1): 165, ProcessCoord(pipe=2, data=9, model=2): 166, ProcessCoord(pipe=2, data=9, model=3): 167, ProcessCoord(pipe=2, data=10, model=0): 168, ProcessCoord(pipe=2, data=10, model=1): 169, ProcessCoord(pipe=2, data=10, model=2): 170, ProcessCoord(pipe=2, data=10, model=3): 171, ProcessCoord(pipe=2, data=11, model=0): 172, ProcessCoord(pipe=2, data=11, model=1): 173, ProcessCoord(pipe=2, data=11, model=2): 174, ProcessCoord(pipe=2, data=11, model=3): 175, ProcessCoord(pipe=2, data=12, model=0): 176, ProcessCoord(pipe=2, data=12, model=1): 177, ProcessCoord(pipe=2, data=12, model=2): 178, ProcessCoord(pipe=2, data=12, model=3): 179, ProcessCoord(pipe=2, data=13, model=0): 180, ProcessCoord(pipe=2, data=13, model=1): 181, ProcessCoord(pipe=2, data=13, model=2): 182, ProcessCoord(pipe=2, data=13, model=3): 183, ProcessCoord(pipe=2, data=14, model=0): 184, ProcessCoord(pipe=2, data=14, model=1): 185, ProcessCoord(pipe=2, data=14, model + 0: =2): 186, ProcessCoord(pipe=2, data=14, model=3): 187, ProcessCoord(pipe=2, data=15, model=0): 188, ProcessCoord(pipe=2, data=15, model=1): 189, ProcessCoord(pipe=2, data=15, model=2): 190, ProcessCoord(pipe=2, data=15, model=3): 191, ProcessCoord(pipe=3, data=0, model=0): 192, ProcessCoord(pipe=3, data=0, model=1): 193, ProcessCoord(pipe=3, data=0, model=2): 194, ProcessCoord(pipe=3, data=0, model=3): 195, ProcessCoord(pipe=3, data=1, model=0): 196, ProcessCoord(pipe=3, data=1, model=1): 197, ProcessCoord(pipe=3, data=1, model=2): 198, ProcessCoord(pipe=3, data=1, model=3): 199, ProcessCoord(pipe=3, data=2, model=0): 200, ProcessCoord(pipe=3, data=2, model=1): 201, ProcessCoord(pipe=3, data=2, model=2): 202, ProcessCoord(pipe=3, data=2, model=3): 203, ProcessCoord(pipe=3, data=3, model=0): 204, ProcessCoord(pipe=3, data=3, model=1): 205, ProcessCoord(pipe=3, data=3, model=2): 206, ProcessCoord(pipe=3, data=3, model=3): 207, ProcessCoord(pipe=3, data=4, model=0): 208, ProcessCoord(pipe=3, data=4, model=1): 20 + 0: 9, ProcessCoord(pipe=3, data=4, model=2): 210, ProcessCoord(pipe=3, data=4, model=3): 211, ProcessCoord(pipe=3, data=5, model=0): 212, ProcessCoord(pipe=3, data=5, model=1): 213, ProcessCoord(pipe=3, data=5, model=2): 214, ProcessCoord(pipe=3, data=5, model=3): 215, ProcessCoord(pipe=3, data=6, model=0): 216, ProcessCoord(pipe=3, data=6, model=1): 217, ProcessCoord(pipe=3, data=6, model=2): 218, ProcessCoord(pipe=3, data=6, model=3): 219, ProcessCoord(pipe=3, data=7, model=0): 220, ProcessCoord(pipe=3, data=7, model=1): 221, ProcessCoord(pipe=3, data=7, model=2): 222, ProcessCoord(pipe=3, data=7, model=3): 223, ProcessCoord(pipe=3, data=8, model=0): 224, ProcessCoord(pipe=3, data=8, model=1): 225, ProcessCoord(pipe=3, data=8, model=2): 226, ProcessCoord(pipe=3, data=8, model=3): 227, ProcessCoord(pipe=3, data=9, model=0): 228, ProcessCoord(pipe=3, data=9, model=1): 229, ProcessCoord(pipe=3, data=9, model=2): 230, ProcessCoord(pipe=3, data=9, model=3): 231, ProcessCoord(pipe=3, data=10, model=0): 232, ProcessC + 0: oord(pipe=3, data=10, model=1): 233, ProcessCoord(pipe=3, data=10, model=2): 234, ProcessCoord(pipe=3, data=10, model=3): 235, ProcessCoord(pipe=3, data=11, model=0): 236, ProcessCoord(pipe=3, data=11, model=1): 237, ProcessCoord(pipe=3, data=11, model=2): 238, ProcessCoord(pipe=3, data=11, model=3): 239, ProcessCoord(pipe=3, data=12, model=0): 240, ProcessCoord(pipe=3, data=12, model=1): 241, ProcessCoord(pipe=3, data=12, model=2): 242, ProcessCoord(pipe=3, data=12, model=3): 243, ProcessCoord(pipe=3, data=13, model=0): 244, ProcessCoord(pipe=3, data=13, model=1): 245, ProcessCoord(pipe=3, data=13, model=2): 246, ProcessCoord(pipe=3, data=13, model=3): 247, ProcessCoord(pipe=3, data=14, model=0): 248, ProcessCoord(pipe=3, data=14, model=1): 249, ProcessCoord(pipe=3, data=14, model=2): 250, ProcessCoord(pipe=3, data=14, model=3): 251, ProcessCoord(pipe=3, data=15, model=0): 252, ProcessCoord(pipe=3, data=15, model=1): 253, ProcessCoord(pipe=3, data=15, model=2): 254, ProcessCoord(pipe=3, data=15, model=3): 25 + 0: 5} + 0: [2023-05-25 13:37:53,844] [INFO] [module.py:366:_partition_layers] Partitioning pipeline stages with method type:transformer + 0: stage=0 layers=14 + 0: 0: _to_float16 + 0: 1: EmbeddingPipe + 0: 2: + 0: 3: ParallelTransformerLayerPipe + 0: 4: ParallelTransformerLayerPipe + 0: 5: ParallelTransformerLayerPipe + 0: 6: ParallelTransformerLayerPipe + 0: 7: ParallelTransformerLayerPipe + 0: 8: ParallelTransformerLayerPipe + 0: 9: ParallelTransformerLayerPipe + 0: 10: ParallelTransformerLayerPipe + 0: 11: ParallelTransformerLayerPipe + 0: 12: ParallelTransformerLayerPipe + 0: 13: ParallelTransformerLayerPipe + 0: stage=1 layers=11 + 0: 14: ParallelTransformerLayerPipe + 0: 15: ParallelTransformerLayerPipe + 0: 16: ParallelTransformerLayerPipe + 0: 17: ParallelTransformerLayerPipe + 0: 18: ParallelTransformerLayerPipe + 0: 19: ParallelTransformerLayerPipe + 0: 20: ParallelTransformerLayerPipe + 0: 21: ParallelTransformerLayerPipe + 0: 22: ParallelTransformerLayerPipe + 0: 23: ParallelTransformerLayerPipe + 0: 24: ParallelTransformerLayerPipe + 0: stage=2 layers=11 + 0: 25: ParallelTransformerLayerPipe + 0: 26: ParallelTransformerLayerPipe + 0: 27: ParallelTransformerLayerPipe + 0: 28: ParallelTransformerLayerPipe + 0: 29: ParallelTransformerLayerPipe + 0: 30: ParallelTransformerLayerPipe + 0: 31: ParallelTransformerLayerPipe + 0: 32: ParallelTransformerLayerPipe + 0: 33: ParallelTransformerLayerPipe + 0: 34: ParallelTransformerLayerPipe + 0: 35: ParallelTransformerLayerPipe + 0: stage=3 layers=13 + 0: 36: ParallelTransformerLayerPipe + 0: 37: ParallelTransformerLayerPipe + 0: 38: ParallelTransformerLayerPipe + 0: 39: ParallelTransformerLayerPipe + 0: 40: ParallelTransformerLayerPipe + 0: 41: ParallelTransformerLayerPipe + 0: 42: ParallelTransformerLayerPipe + 0: 43: ParallelTransformerLayerPipe + 0: 44: ParallelTransformerLayerPipe + 0: 45: undo + 0: 46: MixedFusedLayerNorm + 0: 47: EmbeddingPipe + 0: 48: float16_to_fp32 + 0: loss: CrossEntropy + 0: [2023-05-25 13:37:55,402] [INFO] [utils.py:827:see_memory_usage] After Building Model + 0: [2023-05-25 13:37:55,403] [INFO] [utils.py:828:see_memory_usage] MA 1.16 GB Max_MA 1.16 GB CA 1.19 GB Max_CA 1 GB + 0: [2023-05-25 13:37:55,403] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 41.65 GB, percent = 8.3% + 0: setting training iterations to 0 + 0: > learning rate decay style: cosine + 0: DeepSpeed is enabled. + 0: [2023-05-25 13:37:55,405] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.5, git-hash=unknown, git-branch=unknown +31: ninja: no work to do. +31: Time to load utils op: 0.35482096672058105 seconds +31: Time to load utils op: 0.3555941581726074 seconds +19: Time to load utils op: 0.6255035400390625 seconds +19: Time to load utils op: 0.7619521617889404 seconds +19: Time to load utils op: 0.640009880065918 seconds +19: Time to load utils op: 0.7617371082305908 seconds +19: Time to load utils op: 0.6402287483215332 secondsTime to load utils op: 0.6235151290893555 seconds +19: +23: Time to load utils op: 0.6442480087280273 seconds +23: Time to load utils op: 0.644397497177124 seconds +23: Time to load utils op: 0.6295356750488281 secondsTime to load utils op: 0.6297821998596191 seconds +23: +21: Time to load utils op: 0.6447434425354004 secondsTime to load utils op: 0.6303584575653076 seconds +21: +21: Time to load utils op: 0.6448769569396973 secondsTime to load utils op: 0.6303319931030273 seconds +21: +18: Time to load utils op: 0.6607129573822021 secondsTime to load utils op: 0.650705099105835 seconds +18: +18: Time to load utils op: 0.6307802200317383 seconds +18: Time to load utils op: 0.630800724029541 seconds + 9: Time to load utils op: 0.7523808479309082 seconds + 9: Time to load utils op: 0.7523963451385498 seconds + 9: Time to load utils op: 0.5048801898956299 seconds + 9: Time to load utils op: 0.5050580501556396 seconds + 9: Time to load utils op: 0.6034388542175293 seconds + 9: Time to load utils op: 0.6033079624176025 seconds + 8: Time to load utils op: 0.6116323471069336 seconds + 8: Time to load utils op: 0.6110455989837646 seconds + 8: Time to load utils op: 0.7630081176757812 secondsTime to load utils op: 0.5121304988861084 seconds + 8: + 8: Time to load utils op: 0.7630181312561035 secondsTime to load utils op: 0.6185076236724854 seconds + 8: + 8: Time to load utils op: 0.5171716213226318 seconds + 8: Time to load utils op: 0.6094648838043213 seconds + 9: Time to load utils op: 0.6036350727081299 seconds + 9: Time to load utils op: 0.6037423610687256 seconds +22: Time to load utils op: 0.637505054473877 seconds +22: Time to load utils op: 0.6547441482543945 seconds +22: Time to load utils op: 0.6399564743041992 secondsTime to load utils op: 0.6537578105926514 seconds +22: +28: Time to load utils op: 0.37691831588745117 seconds +28: Time to load utils op: 0.37659502029418945 seconds +20: Time to load utils op: 0.6473941802978516 seconds +20: Time to load utils op: 0.6630938053131104 seconds +20: Time to load utils op: 0.6473824977874756 seconds +20: Time to load utils op: 0.6630170345306396 seconds +16: Time to load utils op: 0.6548614501953125 seconds +16: Time to load utils op: 0.6494412422180176 secondsTime to load utils op: 0.5049982070922852 seconds +16: +20: Time to load utils op: 0.5028359889984131 seconds +20: Time to load utils op: 0.5033493041992188 seconds +23: Time to load utils op: 0.5036108493804932 secondsTime to load utils op: 0.5036704540252686 seconds +23: +16: Time to load utils op: 0.5024404525756836 seconds +10: Time to load utils op: 0.5244503021240234 secondsTime to load utils op: 0.7615101337432861 seconds +10: +14: Time to load utils op: 0.6249353885650635 seconds +14: Time to load utils op: 0.7571377754211426 seconds +14: Time to load utils op: 0.6251101493835449 seconds +10: Time to load utils op: 0.7614555358886719 seconds +14: Time to load utils op: 0.524993896484375 secondsTime to load utils op: 0.6103222370147705 seconds +14: Time to load utils op: 0.523761510848999 seconds +10: Time to load utils op: 0.6244323253631592 secondsTime to load utils op: 0.5242915153503418 seconds +10: +10: Time to load utils op: 0.6243798732757568 secondsTime to load utils op: 0.61395263671875 seconds +10: +14: +10: Time to load utils op: 0.6139044761657715 seconds +14: Time to load utils op: 0.6101114749908447 seconds +14: Time to load utils op: 0.7553789615631104 seconds +18: Time to load utils op: 0.503535270690918 seconds +18: Time to load utils op: 0.5032339096069336 seconds +22: Time to load utils op: 0.5034031867980957 seconds +22: Time to load utils op: 0.5035736560821533 seconds +27: Time to load utils op: 0.375333309173584 seconds +27: Time to load utils op: 0.3753824234008789 seconds + 3: Time to load utils op: 0.39586830139160156 secondsTime to load utils op: 0.4019327163696289 seconds + 3: +19: Time to load utils op: 0.503997802734375 seconds + 4: Time to load utils op: 0.43061161041259766 seconds + 4: Time to load utils op: 0.42981815338134766 seconds +26: Time to load utils op: 0.3466982841491699 seconds +26: Time to load utils op: 0.3487203121185303 seconds +17: Time to load utils op: 0.6555821895599365 secondsTime to load utils op: 0.5060997009277344 seconds +17: +17: Time to load utils op: 0.6556594371795654 seconds +17: Time to load utils op: 0.5062394142150879 seconds +25: Time to load utils op: 0.34867024421691895 seconds +12: Time to load utils op: 0.6252782344818115 seconds +12: Time to load utils op: 0.756960391998291 seconds +25: Time to load utils op: 0.3486919403076172 seconds +12: Time to load utils op: 0.524432897567749 seconds +12: Time to load utils op: 0.5242776870727539 seconds +12: Time to load utils op: 0.6135294437408447 seconds +12: Time to load utils op: 0.6247193813323975 secondsTime to load utils op: 0.7569403648376465 seconds +12: +12: Time to load utils op: 0.6135334968566895 seconds +19: Time to load utils op: 0.5035393238067627 seconds +13: Time to load utils op: 0.7623779773712158 seconds +13: Time to load utils op: 0.629141092300415 seconds +13: Time to load utils op: 0.6168467998504639 secondsTime to load utils op: 0.7623577117919922 seconds +13: +13: Time to load utils op: 0.6291086673736572 secondsTime to load utils op: 0.5278606414794922 secondsTime to load utils op: 0.6169326305389404 seconds +13: +13: +13: Time to load utils op: 0.5280539989471436 seconds +15: Time to load utils op: 0.5263903141021729 secondsTime to load utils op: 0.7589719295501709 secondsTime to load utils op: 0.6280527114868164 seconds +15: +15: +15: Time to load utils op: 0.5273325443267822 seconds +15: Time to load utils op: 0.7568120956420898 seconds +15: Time to load utils op: 0.6129376888275146 seconds +15: Time to load utils op: 0.6265995502471924 secondsTime to load utils op: 0.6131298542022705 seconds +15: +21: Time to load utils op: 0.5034613609313965 seconds +21: Time to load utils op: 0.5035789012908936 seconds + 0: [2023-05-25 13:37:56,752] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False + 0: [2023-05-25 13:37:56,752] [INFO] [logging.py:68:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer + 0: [2023-05-25 13:37:56,752] [INFO] [logging.py:68:log_dist] [Rank 0] Using client Optimizer as basic optimizer +11: Time to load utils op: 0.649294376373291 seconds +11: Time to load utils op: 0.6384930610656738 seconds +11: Time to load utils op: 0.7818303108215332 secondsTime to load utils op: 0.5526106357574463 secondsTime to load utils op: 0.7818324565887451 seconds +11: +11: + 0: [2023-05-25 13:37:56,755] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam +11: Time to load utils op: 0.6502220630645752 seconds +11: Time to load utils op: 0.5489628314971924 seconds + 0: [2023-05-25 13:37:56,755] [INFO] [logging.py:68:log_dist] [Rank 0] Creating BF16 optimizer +11: Time to load utils op: 0.6385819911956787 seconds + 7: Time to load utils op: 0.40544700622558594 seconds + 7: Time to load utils op: 0.40546727180480957 seconds + 1: Time to load utils op: 0.42217397689819336 secondsTime to load utils op: 0.42217302322387695 seconds + 1: +29: Time to load utils op: 0.4243769645690918 seconds +29: Time to load utils op: 0.42180967330932617 seconds +24: Time to load utils op: 0.3813297748565674 seconds +24: Time to load utils op: 0.3812861442565918 seconds +30: Time to load utils op: 0.3537108898162842 seconds +30: Time to load utils op: 0.3539261817932129 seconds + 5: Time to load utils op: 0.47379064559936523 secondsTime to load utils op: 0.4736306667327881 seconds + 5: + 2: Time to load utils op: 0.41500210762023926 secondsTime to load utils op: 0.41503238677978516 seconds + 2: + 0: Time to load utils op: 0.5198867321014404 seconds + 0: Time to load utils op: 0.5251860618591309 seconds + 0: [2023-05-25 13:37:56,878] [INFO] [utils.py:827:see_memory_usage] begin bf16_optimizer + 0: [2023-05-25 13:37:56,879] [INFO] [utils.py:828:see_memory_usage] MA 1.15 GB Max_MA 1.18 GB CA 1.21 GB Max_CA 1 GB + 0: [2023-05-25 13:37:56,879] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.22 GB, percent = 8.4% +13: Time to load utils op: 0.0004911422729492188 seconds +13: Time to load utils op: 0.0005314350128173828 seconds +13: Time to load utils op: 0.0005457401275634766 seconds +13: Time to load utils op: 0.0003991127014160156 seconds +13: Time to load utils op: 0.000438690185546875 seconds +13: Time to load utils op: 0.0003879070281982422 seconds + 3: Time to load utils op: 0.0006628036499023438 seconds + 3: Time to load utils op: 0.0007040500640869141 seconds +14: Time to load utils op: 0.0005333423614501953 secondsTime to load utils op: 0.0004971027374267578 seconds +14: +14: Time to load utils op: 0.00039458274841308594 seconds +14: Time to load utils op: 0.0004482269287109375 secondsTime to load utils op: 0.0004658699035644531 seconds +14: +14: Time to load utils op: 0.0004975795745849609 seconds +14: Time to load utils op: 0.0004334449768066406 seconds +14: Time to load utils op: 0.00048804283142089844 seconds + 4: Time to load utils op: 0.0007066726684570312 seconds + 4: Time to load utils op: 0.0009107589721679688 seconds +10: Time to load utils op: 0.0005564689636230469 seconds +10: Time to load utils op: 0.00055694580078125 secondsTime to load utils op: 0.0005605220794677734 seconds +10: +10: Time to load utils op: 0.0006020069122314453 secondsTime to load utils op: 0.000598907470703125 secondsTime to load utils op: 0.0005743503570556641 seconds +10: +10: +10: Time to load utils op: 0.0005400180816650391 seconds +10: Time to load utils op: 0.0006353855133056641 seconds +28: Time to load utils op: 0.0005469322204589844 seconds +28: Time to load utils op: 0.0005781650543212891 seconds +21: Time to load utils op: 0.000453948974609375 seconds +21: Time to load utils op: 0.0004487037658691406 seconds +21: Time to load utils op: 0.0004787445068359375 seconds +21: Time to load utils op: 0.0004916191101074219 seconds +21: Time to load utils op: 0.00048089027404785156 seconds +21: Time to load utils op: 0.0009932518005371094 seconds +31: Time to load utils op: 0.0010154247283935547 seconds +31: Time to load utils op: 0.0008137226104736328 seconds +19: Time to load utils op: 0.0005137920379638672 seconds +19: Time to load utils op: 0.0005669593811035156 seconds +19: Time to load utils op: 0.0005655288696289062 seconds +19: Time to load utils op: 0.0005581378936767578 seconds +19: Time to load utils op: 0.0005652904510498047 seconds +19: Time to load utils op: 0.0005533695220947266 secondsTime to load utils op: 0.0005691051483154297 seconds +19: +19: Time to load utils op: 0.000537872314453125 seconds +24: Time to load utils op: 0.0008769035339355469 seconds +24: Time to load utils op: 0.0008220672607421875 seconds +30: Time to load utils op: 0.0005125999450683594 seconds +30: Time to load utils op: 0.0005364418029785156 seconds + 0: Time to load utils op: 0.0007154941558837891 seconds + 0: Time to load utils op: 0.0008318424224853516 seconds +18: Time to load utils op: 0.0005295276641845703 seconds +18: Time to load utils op: 0.00042819976806640625 seconds +18: Time to load utils op: 0.00043773651123046875 seconds +18: Time to load utils op: 0.0004324913024902344 seconds +18: Time to load utils op: 0.0003883838653564453 seconds +18: Time to load utils op: 0.0004742145538330078 seconds +17: Time to load utils op: 0.00046706199645996094 seconds +17: Time to load utils op: 0.00047469139099121094 seconds +17: Time to load utils op: 0.00048232078552246094 seconds +17: Time to load utils op: 0.0005714893341064453 seconds +16: Time to load utils op: 0.0004658699035644531 secondsTime to load utils op: 0.00046181678771972656 seconds +16: +16: Time to load utils op: 0.0004951953887939453 seconds +16: Time to load utils op: 0.0005321502685546875 seconds +15: Time to load utils op: 0.0005424022674560547 seconds +15: Time to load utils op: 0.000530242919921875 seconds +15: Time to load utils op: 0.0004715919494628906 seconds +15: Time to load utils op: 0.0005218982696533203 seconds +15: Time to load utils op: 0.0005478858947753906 secondsTime to load utils op: 0.0005428791046142578 seconds +15: +15: Time to load utils op: 0.0005142688751220703 seconds +15: Time to load utils op: 0.0005137920379638672 seconds +26: Time to load utils op: 0.0007157325744628906 seconds +26: Time to load utils op: 0.0008149147033691406 seconds + 9: Time to load utils op: 0.0005331039428710938 seconds + 9: Time to load utils op: 0.0005381107330322266 secondsTime to load utils op: 0.0005323886871337891 seconds + 9: + 9: Time to load utils op: 0.0005395412445068359 seconds + 9: Time to load utils op: 0.0005598068237304688 secondsTime to load utils op: 0.0005774497985839844 secondsTime to load utils op: 0.0005488395690917969 seconds + 9: + 9: + 9: Time to load utils op: 0.0004863739013671875 seconds +25: Time to load utils op: 0.0007739067077636719 seconds +25: Time to load utils op: 0.0006783008575439453 seconds +25: ninja: no work to do. + 5: Time to load utils op: 0.0008363723754882812 seconds + 5: Time to load utils op: 0.0007798671722412109 seconds +12: Time to load utils op: 0.000560760498046875 seconds +12: Time to load utils op: 0.0005450248718261719 seconds +12: Time to load utils op: 0.0005784034729003906 seconds +12: Time to load utils op: 0.0004837512969970703 secondsTime to load utils op: 0.0005052089691162109 seconds +12: +12: Time to load utils op: 0.0004963874816894531 seconds +12: Time to load utils op: 0.0004868507385253906 seconds +12: Time to load utils op: 0.0005054473876953125 seconds +22: Time to load utils op: 0.0005154609680175781 seconds +22: Time to load utils op: 0.0004169940948486328 seconds +22: Time to load utils op: 0.0004322528839111328 seconds +22: Time to load utils op: 0.00042700767517089844 seconds + 8: Time to load utils op: 0.0004940032958984375 seconds + 8: Time to load utils op: 0.0004024505615234375 seconds + 8: Time to load utils op: 0.0004024505615234375 seconds + 8: Time to load utils op: 0.0005018711090087891 seconds +23: Time to load utils op: 0.0009386539459228516 seconds +22: Time to load utils op: 0.0004756450653076172 seconds + 8: Time to load utils op: 0.0004134178161621094 seconds +23: Time to load utils op: 0.000978231430053711 seconds +22: Time to load utils op: 0.0004837512969970703 seconds +23: Time to load utils op: 0.0009717941284179688 secondsTime to load utils op: 0.000989675521850586 seconds + 8: Time to load utils op: 0.0004093647003173828 seconds +23: +23: Time to load utils op: 0.00096893310546875 secondsTime to load utils op: 0.000993967056274414 seconds +23: + 8: Time to load utils op: 0.000400543212890625 seconds + 1: Time to load utils op: 0.0008938312530517578 seconds +11: Time to load utils op: 0.0006086826324462891 seconds + 8: Time to load utils op: 0.00042319297790527344 seconds +11: Time to load utils op: 0.00047516822814941406 secondsTime to load utils op: 0.0004935264587402344 seconds +11: +11: Time to load utils op: 0.0004901885986328125 seconds + 7: Time to load utils op: 0.0008356571197509766 secondsTime to load utils op: 0.0008122920989990234 seconds + 7: +11: Time to load utils op: 0.0005433559417724609 seconds +11: Time to load utils op: 0.0005519390106201172 secondsTime to load utils op: 0.0005276203155517578 seconds +11: +11: Time to load utils op: 0.0005927085876464844 seconds + 1: Time to load utils op: 0.0007457733154296875 seconds +29: Time to load utils op: 0.0009691715240478516 seconds +29: Time to load utils op: 0.0008306503295898438 seconds +27: Time to load utils op: 0.0010447502136230469 seconds +20: Time to load utils op: 0.003806591033935547 seconds +20: Time to load utils op: 0.0038080215454101562 seconds +27: Time to load utils op: 0.005916118621826172 seconds +20: Time to load utils op: 0.0038957595825195312 seconds +20: Time to load utils op: 0.003911495208740234 seconds +20: Time to load utils op: 0.003934383392333984 seconds +20: Time to load utils op: 0.003922462463378906 seconds +25: Time to load utils op: 0.20262527465820312 seconds +13: Time to load utils op: 0.00041174888610839844 seconds +13: Time to load utils op: 0.00041985511779785156 seconds +25: Time to load utils op: 0.0006263256072998047 seconds + 2: Time to load utils op: 0.0009274482727050781 seconds + 2: Time to load utils op: 0.0010440349578857422 seconds +29: ninja: no work to do. +29: Time to load utils op: 0.17166757583618164 seconds +29: Time to load utils op: 0.0005838871002197266 seconds + 7: ninja: no work to do. + 3: Time to load utils op: 0.3140723705291748 seconds + 2: Time to load utils op: 0.3137197494506836 seconds + 4: Time to load utils op: 0.6191346645355225 secondsTime to load utils op: 0.6192529201507568 seconds + 4: + 5: Time to load utils op: 0.619361400604248 seconds + 0: Time to load utils op: 0.6335406303405762 seconds + 2: Time to load utils op: 0.6100924015045166 secondsTime to load utils op: 0.314011812210083 seconds + 2: + 2: Time to load utils op: 0.6131505966186523 seconds + 1: Time to load utils op: 0.31455373764038086 seconds + 7: Time to load utils op: 0.6125161647796631 seconds + 3: Time to load utils op: 0.3138618469238281 seconds + 5: Time to load utils op: 0.6199865341186523 seconds + 3: Time to load utils op: 0.6209688186645508 seconds + 3: Time to load utils op: 0.6208791732788086 seconds + 0: Time to load utils op: 0.5101697444915771 seconds + 1: Time to load utils op: 0.3148794174194336 seconds + 1: Time to load utils op: 0.6159496307373047 seconds + 7: Time to load utils op: 0.6136651039123535 seconds + 7: Time to load utils op: 0.2957611083984375 seconds + 1: Time to load utils op: 0.6159713268280029 seconds + 4: Time to load utils op: 0.3027513027191162 seconds + 4: Time to load utils op: 0.3033452033996582 seconds + 5: Time to load utils op: 0.3021821975708008 seconds + 5: Time to load utils op: 0.30245161056518555 seconds + 7: Time to load utils op: 0.302898645401001 seconds + 6: Time to load utils op: 0.3058302402496338 seconds + 6: Time to load utils op: 0.30589842796325684 seconds + 6: Time to load utils op: 0.6255815029144287 seconds + 6: Time to load utils op: 0.6257119178771973 seconds + 3: Time to load utils op: 0.0006623268127441406 seconds + 4: Time to load utils op: 0.0004749298095703125 seconds + 3: Time to load utils op: 0.000354766845703125 seconds + 2: Time to load utils op: 0.0004773139953613281 seconds + 2: Time to load utils op: 0.00036072731018066406 seconds + 4: Time to load utils op: 0.00035190582275390625 seconds + 2: Time to load utils op: 0.0004267692565917969 secondsTime to load utils op: 0.00041985511779785156 seconds + 2: + 4: Time to load utils op: 0.00035190582275390625 seconds + 5: Time to load utils op: 0.0004923343658447266 seconds + 5: Time to load utils op: 0.00045561790466308594 seconds + 4: Time to load utils op: 0.00031876564025878906 seconds + 0: Time to load utils op: 0.0005910396575927734 seconds + 3: Time to load utils op: 0.00034618377685546875 seconds + 3: Time to load utils op: 0.00035881996154785156 seconds + 1: Time to load utils op: 0.0004734992980957031 seconds + 1: Time to load utils op: 0.0003402233123779297 seconds + 5: Time to load utils op: 0.0003216266632080078 seconds + 5: Time to load utils op: 0.0003387928009033203 seconds + 7: Time to load utils op: 0.0009579658508300781 seconds + 1: Time to load utils op: 0.0004901885986328125 seconds + 1: Time to load utils op: 0.0004684925079345703 seconds + 7: Time to load utils op: 0.0006663799285888672 seconds + 7: Time to load utils op: 0.0005333423614501953 seconds + 0: Time to load utils op: 0.2028365135192871 seconds + 7: Time to load utils op: 0.0008783340454101562 seconds + 0: Time to load utils op: 0.20167136192321777 seconds + 2: Time to load utils op: 0.20192480087280273 seconds + 2: Time to load utils op: 0.2021617889404297 seconds + 5: Time to load utils op: 0.2022266387939453 secondsTime to load utils op: 0.20219755172729492 seconds + 5: + 6: Time to load utils op: 0.20230889320373535 secondsTime to load utils op: 0.20223712921142578 seconds + 6: + 3: Time to load utils op: 0.20257854461669922 seconds + 4: Time to load utils op: 0.2022080421447754 seconds + 4: Time to load utils op: 0.20213651657104492 seconds + 3: Time to load utils op: 0.20235562324523926 seconds + 1: Time to load utils op: 0.2022075653076172 seconds + 1: Time to load utils op: 0.2019965648651123 seconds + 7: Time to load utils op: 0.20205378532409668 seconds + 7: Time to load utils op: 0.20262551307678223 seconds +24: Time to load utils op: 0.5034389495849609 seconds +24: Time to load utils op: 0.502748966217041 seconds +25: Time to load utils op: 0.5031239986419678 seconds +25: Time to load utils op: 0.5035305023193359 seconds +26: Time to load utils op: 0.5028486251831055 seconds +26: Time to load utils op: 0.5030839443206787 seconds +24: Time to load utils op: 0.40320706367492676 seconds +25: Time to load utils op: 0.7038285732269287 seconds +29: Time to load utils op: 0.504371166229248 seconds + 0: Time to load utils op: 0.00044846534729003906 seconds +28: Time to load utils op: 0.5061213970184326 seconds +27: Time to load utils op: 0.5058856010437012 seconds +16: Time to load utils op: 1.5145437717437744 seconds +17: Time to load utils op: 1.513885259628296 seconds +17: Time to load utils op: 1.513932228088379 seconds +28: Time to load utils op: 0.5066466331481934 seconds +30: Time to load utils op: 0.5041708946228027 seconds +27: Time to load utils op: 0.5060453414916992 seconds +30: Time to load utils op: 0.504035234451294 seconds +16: Time to load utils op: 1.5149848461151123 seconds +18: Time to load utils op: 1.5119423866271973 seconds +28: Time to load utils op: 0.7038934230804443 seconds +26: Time to load utils op: 0.7036864757537842 seconds +26: Time to load utils op: 0.7036492824554443 seconds +28: Time to load utils op: 0.703188419342041 seconds +25: Time to load utils op: 0.4022367000579834 seconds + 0: Time to load utils op: 0.0004093647003173828 seconds +25: Time to load utils op: 0.40237903594970703 seconds +28: Time to load utils op: 0.4023430347442627 seconds +26: Time to load utils op: 0.40228271484375 seconds +29: Time to load utils op: 0.402202844619751 seconds +28: Time to load utils op: 0.40245580673217773 seconds +26: Time to load utils op: 0.402435302734375 seconds +30: Time to load utils op: 0.40219759941101074 seconds +24: Time to load utils op: 0.4026777744293213 seconds +30: Time to load utils op: 0.40262579917907715 seconds +27: Time to load utils op: 0.40294384956359863 seconds +31: Time to load utils op: 0.5039842128753662 seconds +29: Time to load utils op: 0.40281009674072266 seconds +31: Time to load utils op: 0.5040154457092285 seconds +27: Time to load utils op: 0.4026374816894531 seconds + 5: Time to load utils op: 0.0004558563232421875 seconds +27: Time to load utils op: 0.7041220664978027 seconds + 5: Time to load utils op: 0.0003600120544433594 seconds + 2: Time to load utils op: 0.00035834312438964844 seconds +31: Time to load utils op: 0.7040724754333496 seconds + 2: Time to load utils op: 0.0004260540008544922 seconds +31: Time to load utils op: 0.40274882316589355 seconds +27: Time to load utils op: 0.7046599388122559 seconds + 4: Time to load utils op: 0.00037026405334472656 seconds + 4: Time to load utils op: 0.00036787986755371094 seconds + 3: Time to load utils op: 0.00035190582275390625 seconds +31: Time to load utils op: 0.7046389579772949 seconds +31: Time to load utils op: 0.40326738357543945 seconds +18: Time to load utils op: 1.505786657333374 seconds + 3: Time to load utils op: 0.0003795623779296875 seconds +16: Time to load utils op: 1.404435634613037 seconds +24: Time to load utils op: 0.0004868507385253906 seconds +20: Time to load utils op: 1.5071334838867188 seconds +29: Time to load utils op: 0.7089626789093018 secondsTime to load utils op: 0.7092471122741699 seconds +29: +16: Time to load utils op: 1.404235601425171 seconds + 1: Time to load utils op: 0.00036716461181640625 seconds + 1: Time to load utils op: 0.00037169456481933594 seconds + 7: Time to load utils op: 0.0005249977111816406 seconds +20: Time to load utils op: 1.507591724395752 seconds +21: Time to load utils op: 1.50722074508667 seconds +25: Time to load utils op: 0.0004971027374267578 seconds + 6: Time to load utils op: 1.1128008365631104 seconds +23: Time to load utils op: 1.507453203201294 seconds + 6: Time to load utils op: 1.1131837368011475 seconds +23: Time to load utils op: 1.5077097415924072 seconds + 7: Time to load utils op: 0.0005033016204833984 seconds +24: Time to load utils op: 0.0029964447021484375 seconds +17: Time to load utils op: 1.4048008918762207 seconds +25: Time to load utils op: 0.0004019737243652344 seconds +17: Time to load utils op: 1.4049618244171143 seconds +21: Time to load utils op: 1.5075678825378418 seconds +22: Time to load utils op: 1.5080592632293701 seconds +22: Time to load utils op: 1.508380651473999 seconds +25: Time to load utils op: 0.00039505958557128906 seconds +30: Time to load utils op: 0.7036492824554443 seconds +24: Time to load utils op: 0.00045490264892578125 seconds +30: Time to load utils op: 0.7068638801574707 seconds +24: Time to load utils op: 0.7037913799285889 secondsTime to load utils op: 0.7036895751953125 seconds +24: +25: Time to load utils op: 0.00037598609924316406 seconds +30: Time to load utils op: 0.0005154609680175781 seconds +30: Time to load utils op: 0.0005080699920654297 seconds +17: Time to load utils op: 0.0004990100860595703 seconds +17: Time to load utils op: 0.0003681182861328125 seconds +30: Time to load utils op: 0.00035452842712402344 seconds +30: Time to load utils op: 0.000335693359375 seconds +25: Time to load utils op: 0.0003371238708496094 seconds +26: Time to load utils op: 0.007875442504882812 seconds +26: Time to load utils op: 0.007835149765014648 seconds +16: Time to load utils op: 0.008891105651855469 secondsTime to load utils op: 0.009286165237426758 seconds +16: +24: Time to load utils op: 0.00039505958557128906 seconds +30: Time to load utils op: 0.0003631114959716797 seconds +30: Time to load utils op: 0.0003628730773925781 seconds +22: Time to load utils op: 0.0006701946258544922 seconds +22: Time to load utils op: 0.0006990432739257812 seconds +31: Time to load utils op: 0.0008587837219238281 seconds +31: Time to load utils op: 0.0011551380157470703 seconds +31: Time to load utils op: 0.0007977485656738281 seconds +31: Time to load utils op: 0.0006387233734130859 seconds +31: Time to load utils op: 0.0006992816925048828 seconds +31: Time to load utils op: 0.0005319118499755859 seconds +16: Time to load utils op: 0.000362396240234375 seconds + 0: Time to load utils op: 0.40247488021850586 seconds +16: Time to load utils op: 0.0003695487976074219 seconds + 0: Time to load utils op: 0.4025542736053467 seconds +24: Time to load utils op: 0.000347137451171875 seconds + 6: Time to load utils op: 0.0005364418029785156 seconds + 6: Time to load utils op: 0.0004291534423828125 seconds + 6: Time to load utils op: 0.0004558563232421875 seconds + 6: Time to load utils op: 0.0005381107330322266 seconds + 6: Time to load utils op: 0.0005307197570800781 seconds + 6: Time to load utils op: 0.0005469322204589844 seconds + 6: Time to load utils op: 0.0005400180816650391 seconds + 6: Time to load utils op: 0.0006139278411865234 seconds +24: Time to load utils op: 0.0003402233123779297 seconds +28: Time to load utils op: 0.0059223175048828125 seconds +28: Time to load utils op: 0.005858182907104492 seconds +28: Time to load utils op: 0.005786418914794922 seconds +28: Time to load utils op: 0.005945682525634766 seconds +28: Time to load utils op: 0.00642085075378418 seconds +28: Time to load utils op: 0.006276130676269531 seconds + 0: [2023-05-25 13:37:57,516] [INFO] [utils.py:827:see_memory_usage] before initializing group 0 + 0: [2023-05-25 13:37:57,516] [INFO] [utils.py:828:see_memory_usage] MA 1.15 GB Max_MA 1.15 GB CA 1.21 GB Max_CA 1 GB + 0: [2023-05-25 13:37:57,516] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.27 GB, percent = 8.4% + 0: Time to load utils op: 0.0003707408905029297 seconds +29: Time to load utils op: 0.005079746246337891 seconds +29: Time to load utils op: 0.004809379577636719 seconds + 0: Time to load utils op: 0.0004894733428955078 seconds +29: Time to load utils op: 0.005235195159912109 seconds +29: Time to load utils op: 0.00046062469482421875 seconds +27: Time to load utils op: 0.0004982948303222656 seconds +27: Time to load utils op: 0.00038123130798339844 seconds +27: Time to load utils op: 0.00037550926208496094 seconds +27: Time to load utils op: 0.0003502368927001953 seconds +27: Time to load utils op: 0.0003502368927001953 seconds +27: Time to load utils op: 0.0004169940948486328 seconds +18: Time to load utils op: 0.004503011703491211 secondsTime to load utils op: 0.004517555236816406 seconds +18: +20: Time to load utils op: 0.003946065902709961 seconds +17: Time to load utils op: 0.0003981590270996094 seconds +17: Time to load utils op: 0.0003781318664550781 seconds +23: Time to load utils op: 0.004816293716430664 seconds +23: Time to load utils op: 0.004965066909790039 seconds +29: Time to load utils op: 0.0004379749298095703 seconds +20: Time to load utils op: 0.00035881996154785156 seconds +26: Time to load utils op: 0.0003616809844970703 seconds +26: Time to load utils op: 0.0003590583801269531 seconds +26: Time to load utils op: 0.00037407875061035156 seconds +26: Time to load utils op: 0.00048232078552246094 seconds + 0: [2023-05-25 13:37:57,650] [INFO] [utils.py:827:see_memory_usage] after initializing group 0 + 0: [2023-05-25 13:37:57,651] [INFO] [utils.py:828:see_memory_usage] MA 2.43 GB Max_MA 2.43 GB CA 3.14 GB Max_CA 3 GB + 0: [2023-05-25 13:37:57,651] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.31 GB, percent = 8.4% +21: Time to load utils op: 0.004395008087158203 seconds +21: Time to load utils op: 0.003838777542114258 seconds + 0: [2023-05-25 13:37:57,787] [INFO] [utils.py:827:see_memory_usage] before initializing group 1 + 0: [2023-05-25 13:37:57,788] [INFO] [utils.py:828:see_memory_usage] MA 2.43 GB Max_MA 2.43 GB CA 3.14 GB Max_CA 3 GB + 0: [2023-05-25 13:37:57,788] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.34 GB, percent = 8.4% + 0: [2023-05-25 13:37:57,917] [INFO] [utils.py:827:see_memory_usage] after initializing group 1 + 0: [2023-05-25 13:37:57,917] [INFO] [utils.py:828:see_memory_usage] MA 3.58 GB Max_MA 3.58 GB CA 4.76 GB Max_CA 5 GB + 0: [2023-05-25 13:37:57,918] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.46 GB, percent = 8.4% + 0: [2023-05-25 13:37:58,046] [INFO] [utils.py:827:see_memory_usage] before initializing group 2 + 0: [2023-05-25 13:37:58,047] [INFO] [utils.py:828:see_memory_usage] MA 3.58 GB Max_MA 3.58 GB CA 4.76 GB Max_CA 5 GB + 0: [2023-05-25 13:37:58,047] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.61 GB, percent = 8.5% + 0: [2023-05-25 13:37:58,158] [INFO] [utils.py:827:see_memory_usage] after initializing group 2 + 0: [2023-05-25 13:37:58,159] [INFO] [utils.py:828:see_memory_usage] MA 3.58 GB Max_MA 3.58 GB CA 4.76 GB Max_CA 5 GB + 0: [2023-05-25 13:37:58,159] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.62 GB, percent = 8.5% + 0: [2023-05-25 13:37:58,267] [INFO] [utils.py:827:see_memory_usage] before initialize_optimizer + 0: [2023-05-25 13:37:58,267] [INFO] [utils.py:828:see_memory_usage] MA 3.58 GB Max_MA 3.58 GB CA 4.76 GB Max_CA 5 GB + 0: [2023-05-25 13:37:58,267] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.62 GB, percent = 8.5% + 0: [2023-05-25 13:37:58,380] [INFO] [utils.py:827:see_memory_usage] end initialize_optimizer + 0: [2023-05-25 13:37:58,380] [INFO] [utils.py:828:see_memory_usage] MA 3.87 GB Max_MA 3.87 GB CA 5.04 GB Max_CA 5 GB + 0: [2023-05-25 13:37:58,380] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.57 GB, percent = 8.5% + 0: [2023-05-25 13:37:58,487] [INFO] [utils.py:827:see_memory_usage] end bf16_optimizer + 0: [2023-05-25 13:37:58,488] [INFO] [utils.py:828:see_memory_usage] MA 3.87 GB Max_MA 3.87 GB CA 5.04 GB Max_CA 5 GB + 0: [2023-05-25 13:37:58,488] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.57 GB, percent = 8.5% + 0: [2023-05-25 13:37:58,488] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam + 0: [2023-05-25 13:37:58,488] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using client LR scheduler + 0: [2023-05-25 13:37:58,489] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = + 0: [2023-05-25 13:37:58,489] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0002, 0.0002, 0.0002], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] + 0: [2023-05-25 13:37:58,489] [INFO] [config.py:1007:print] DeepSpeedEngine configuration: + 0: [2023-05-25 13:37:58,489] [INFO] [config.py:1011:print] activation_checkpointing_config { + 0: "partition_activations": false, + 0: "contiguous_memory_optimization": false, + 0: "cpu_checkpointing": false, + 0: "number_checkpoints": null, + 0: "synchronize_checkpoint_boundary": false, + 0: "profile": false + 0: } + 0: [2023-05-25 13:37:58,489] [INFO] [config.py:1011:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} + 0: [2023-05-25 13:37:58,489] [INFO] [config.py:1011:print] amp_enabled .................. False + 0: [2023-05-25 13:37:58,489] [INFO] [config.py:1011:print] amp_params ................... False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] autotuning_config ............ { + 0: "enabled": false, + 0: "start_step": null, + 0: "end_step": null, + 0: "metric_path": null, + 0: "arg_mappings": null, + 0: "metric": "throughput", + 0: "model_info": null, + 0: "results_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_results", + 0: "exps_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_exps", + 0: "overwrite": true, + 0: "fast": true, + 0: "start_profile_step": 3, + 0: "end_profile_step": 5, + 0: "tuner_type": "gridsearch", + 0: "tuner_early_stopping": 5, + 0: "tuner_num_trials": 50, + 0: "model_info_path": null, + 0: "mp_size": 1, + 0: "max_train_batch_size": null, + 0: "min_train_batch_size": 1, + 0: "max_train_micro_batch_size_per_gpu": 1.024000e+03, + 0: "min_train_micro_batch_size_per_gpu": 1, + 0: "num_tuning_micro_batch_sizes": 3 + 0: } + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] bfloat16_enabled ............. True + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] checkpoint_parallel_write_pipeline False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] checkpoint_tag_validation_enabled True + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] checkpoint_tag_validation_fail False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] comms_config ................. + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] communication_data_type ...... None + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_pa + 0: rameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] curriculum_enabled ........... False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] curriculum_params ............ False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] dataloader_drop_last ......... False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] disable_allgather ............ False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] dump_state ................... False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] dynamic_loss_scale_args ...... None + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] eigenvalue_enabled ........... False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] eigenvalue_gas_boundary_resolution 1 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] eigenvalue_layer_name ........ bert.encoder.layer + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] eigenvalue_layer_num ......... 0 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] eigenvalue_max_iter .......... 100 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] eigenvalue_stability ......... 1e-06 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] eigenvalue_tol ............... 0.01 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] eigenvalue_verbose ........... False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] elasticity_enabled ........... False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] flops_profiler_config ........ { + 0: "enabled": false, + 0: "profile_step": 1, + 0: "module_depth": -1, + 0: "top_modules": 1, + 0: "detailed": true, + 0: "output_file": null + 0: } + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] fp16_auto_cast ............... None + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] fp16_enabled ................. False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] fp16_master_weights_and_gradients False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] global_rank .................. 0 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] gradient_accumulation_steps .. 32 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] gradient_clipping ............ 1.0 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] gradient_predivide_factor .... 1.0 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] initial_dynamic_scale ........ 1 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] load_universal_checkpoint .... False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] loss_scale ................... 1.0 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] memory_breakdown ............. False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] monitor_config ............... + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] nebula_config ................ { + 0: "enabled": false, + 0: "persistent_storage_path": null, + 0: "persistent_time_interval": 100, + 0: "num_of_version_in_retention": 2, + 0: "enable_nebula_load": true, + 0: "load_path": null + 0: } + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] optimizer_legacy_fusion ...... False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] optimizer_name ............... None + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] optimizer_params ............. None + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] pld_enabled .................. False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] pld_params ................... False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] prescale_gradients ........... False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] scheduler_name ............... None + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] scheduler_params ............. None + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] sparse_attention ............. None + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] sparse_gradients_enabled ..... False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] steps_per_print .............. 2000 + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] train_batch_size ............. 512 + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] train_micro_batch_size_per_gpu 1 + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] use_node_local_storage ....... False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] wall_clock_breakdown ......... False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] world_size ................... 16 + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] zero_allow_untested_optimizer False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500000000 allgather_partitions=True allgather_bucket_size=500000000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=None sub_group_size=1000000000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50000000 param_persistence_threshold=100000 model_persistence_threshold=9223372036854775807 max_live_parameters=1000000000 max_reuse_distance=1000000000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] zero_enabled ................. False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] zero_optimization_stage ...... 0 + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:996:print_user_config] json = { + 0: "train_micro_batch_size_per_gpu": 1, + 0: "train_batch_size": 512, + 0: "gradient_clipping": 1.0, + 0: "zero_optimization": { + 0: "stage": 0 + 0: }, + 0: "bf16": { + 0: "enabled": true + 0: }, + 0: "steps_per_print": 2.000000e+03, + 0: "wall_clock_breakdown": false + 0: } + 0: Time to load utils op: 0.0004456043243408203 seconds + 0: [2023-05-25 13:37:58,492] [INFO] [engine.py:87:__init__] CONFIG: micro_batches=32 micro_batch_size=1 + 0: [2023-05-25 13:37:58,956] [INFO] [engine.py:145:__init__] RANK=0 STAGE=0 LAYERS=14 [0, 14) STAGE_PARAMS=614290432 (614.290M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 0: [2023-05-25 13:37:58,956] [INFO] [engine.py:145:__init__] RANK=3 STAGE=0 LAYERS=14 [0, 14) STAGE_PARAMS=614290432 (614.290M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 0: [2023-05-25 13:37:58,956] [INFO] [engine.py:145:__init__] RANK=2 STAGE=0 LAYERS=14 [0, 14) STAGE_PARAMS=614290432 (614.290M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 0: [2023-05-25 13:37:58,956] [INFO] [engine.py:145:__init__] RANK=1 STAGE=0 LAYERS=14 [0, 14) STAGE_PARAMS=614290432 (614.290M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 8: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=64 STAGE=1 LAYERS=11 [14, 25) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 8: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=67 STAGE=1 LAYERS=11 [14, 25) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 8: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=66 STAGE=1 LAYERS=11 [14, 25) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 8: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=65 STAGE=1 LAYERS=11 [14, 25) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +16: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=128 STAGE=2 LAYERS=11 [25, 36) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +16: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=129 STAGE=2 LAYERS=11 [25, 36) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +16: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=130 STAGE=2 LAYERS=11 [25, 36) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +16: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=131 STAGE=2 LAYERS=11 [25, 36) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +24: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=193 STAGE=3 LAYERS=13 [36, 49) STAGE_PARAMS=513571840 (513.572M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +24: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=194 STAGE=3 LAYERS=13 [36, 49) STAGE_PARAMS=513571840 (513.572M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +24: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=195 STAGE=3 LAYERS=13 [36, 49) STAGE_PARAMS=513571840 (513.572M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +24: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=192 STAGE=3 LAYERS=13 [36, 49) STAGE_PARAMS=513571840 (513.572M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 0: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 3: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 3: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 3: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... + 3: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 3: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... +16: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +16: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +16: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... + 3: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +16: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +16: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... + 1: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... + 1: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... + 1: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... + 1: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... + 1: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... + 1: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +29: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +12: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... + 1: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +12: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +29: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 1: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 1: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 6: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... + 1: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... +12: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +12: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... +29: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +29: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +12: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +12: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +29: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +29: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... + 6: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +29: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +29: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +29: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +12: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +12: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +19: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... +19: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... +18: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... +18: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +19: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... + 6: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +19: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +19: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +19: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... + 4: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 4: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... +18: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... + 2: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +18: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +19: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... + 4: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 4: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... +18: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +14: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +18: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... + 4: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... + 4: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +14: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +18: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... + 2: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... + 1: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... + 1: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... + 4: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +15: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... + 4: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +21: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +11: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +21: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +27: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +23: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +22: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +22: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +22: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +27: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +22: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +22: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +22: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +22: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +27: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +21: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +31: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +31: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +31: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +31: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +31: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +21: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +22: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +21: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +31: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +21: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +31: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 7: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... +11: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +31: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... + 7: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 7: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... +11: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +11: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... + 7: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... +11: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +27: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +11: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +11: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... +23: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +11: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +20: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +22: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +22: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +27: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +27: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +23: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +22: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +25: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +25: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +23: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +23: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +22: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +27: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +27: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +27: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +27: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +25: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +23: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +25: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +23: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +25: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +25: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +25: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +22: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +25: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +20: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +20: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +31: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +22: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +22: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +22: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +31: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +31: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +31: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +31: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +31: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +31: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +31: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +23: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +23: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +20: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +20: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +20: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +20: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +20: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +20: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... + 7: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... + 7: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +10: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +10: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +10: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... +10: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +10: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +10: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +10: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +28: [2023-05-25 13:38:00,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +28: [2023-05-25 13:38:00,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +27: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +31: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +31: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +27: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +16: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +29: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +26: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +24: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +22: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +16: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +26: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +24: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +25: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +29: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +28: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +21: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +23: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +28: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +23: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +18: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +17: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +21: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +13: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +27: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +19: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +22: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +27: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +13: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +19: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +31: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +16: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +16: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +29: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +26: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +30: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +17: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +24: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +10: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +14: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +29: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +26: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +22: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +24: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +30: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +10: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +13: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +23: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +23: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +22: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +14: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. + 8: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +25: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +13: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +17: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +25: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +18: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +10: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +13: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +14: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +19: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +17: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +30: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +13: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +19: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +10: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +14: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. + 9: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +14: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +30: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +10: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +13: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. + 8: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +12: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +14: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. + 9: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +13: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +18: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +10: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +12: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +15: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +14: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +12: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. + 9: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +14: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +12: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. + 8: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +18: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 8: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 0: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 3: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. +10: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. + 7: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 6: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 2: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. +15: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 1: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 4: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 6: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 7: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 3: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 0: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +12: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +12: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 6: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 2: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 0: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 4: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 3: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. +12: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 0: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 2: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. +12: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 7: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 2: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. +15: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 2: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 1: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 4: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +15: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. + 1: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 3: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 0: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 6: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 2: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 1: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 4: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 1: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 6: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 2: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 7: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 3: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. +15: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 7: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 6: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +13: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. + 0: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +13: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +14: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +13: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +14: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. + 9: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +13: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +14: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. + 8: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. + 8: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +12: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +10: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +14: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +12: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +15: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. + 9: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +24: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +24: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +12: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +12: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +24: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +29: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +30: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +28: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +27: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. + 5: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +30: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 5: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 5: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 5: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... + 5: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 5: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... +28: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +26: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. + 5: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 5: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 5: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 5: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 5: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +16: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. + 5: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 5: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 5: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +25: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. + 5: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +29: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +16: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +28: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +25: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +24: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +30: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +29: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +28: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +26: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +27: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +26: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +27: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +30: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +29: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +27: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +27: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +30: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +28: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +28: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +26: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +24: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +26: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +15: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +17: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +19: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +16: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +31: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +29: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +17: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +16: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +21: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +19: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +25: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +23: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +30: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +28: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +24: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +22: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +29: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +18: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +25: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +21: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +26: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +27: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +23: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +25: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +30: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +27: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +28: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +24: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +19: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +17: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +26: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +22: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +29: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +17: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +19: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +30: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +23: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +31: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +23: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +25: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +15: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +31: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:38:00,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:38:00,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +13: [2023-05-25 13:38:00,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +16: [2023-05-25 13:38:00,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +16: [2023-05-25 13:38:00,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +13: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +19: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. + 8: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +14: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. + 9: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +17: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +19: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +14: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +10: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +28: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. + 9: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +17: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +10: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +16: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +13: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +28: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +16: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +15: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +19: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +22: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +13: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +29: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. + 8: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +14: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +17: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +19: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +14: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +30: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +29: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. + 9: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +28: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +12: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +10: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +17: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +26: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +10: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +18: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +24: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +23: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +30: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +27: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +28: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +12: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +26: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +24: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +21: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +27: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +23: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +22: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +18: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +16: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +22: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +29: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +16: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +25: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +17: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +18: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +29: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +30: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +19: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +25: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +24: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +26: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +15: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +27: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +23: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +31: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +30: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +21: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +17: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +26: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +27: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +15: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +18: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +24: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +23: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +31: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +22: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +16: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +23: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +22: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +16: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +19: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +17: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +23: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +19: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +12: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +21: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +25: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +12: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +17: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +23: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +22: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +19: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +23: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +31: [2023-05-25 13:38:00,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:38:00,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +21: [2023-05-25 13:38:00,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:38:00,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:38:00,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +11: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +20: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +11: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +20: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +11: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +20: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +11: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +20: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +11: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +20: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +11: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +20: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +11: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +20: [2023-05-25 13:38:00,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +11: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +20: [2023-05-25 13:38:00,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +11: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +20: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +11: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +20: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +11: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +20: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +11: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +20: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +11: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +20: [2023-05-25 13:38:00,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +20: [2023-05-25 13:38:00,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +11: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +20: [2023-05-25 13:38:00,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +11: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +20: [2023-05-25 13:38:00,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +11: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... + 9: [2023-05-25 13:38:00,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... + 9: [2023-05-25 13:38:00,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... + 9: [2023-05-25 13:38:00,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... + 9: [2023-05-25 13:38:00,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... + 9: [2023-05-25 13:38:00,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +14: [2023-05-25 13:38:00,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +14: [2023-05-25 13:38:00,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +14: [2023-05-25 13:38:00,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +14: [2023-05-25 13:38:00,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +14: [2023-05-25 13:38:00,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +14: [2023-05-25 13:38:00,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +14: [2023-05-25 13:38:00,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +14: [2023-05-25 13:38:00,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +11: [2023-05-25 13:38:00,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +11: [2023-05-25 13:38:00,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +11: [2023-05-25 13:38:00,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +11: [2023-05-25 13:38:00,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +11: [2023-05-25 13:38:00,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +11: [2023-05-25 13:38:00,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +11: [2023-05-25 13:38:00,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +11: [2023-05-25 13:38:00,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +10: [2023-05-25 13:38:00,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:38:00,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +10: [2023-05-25 13:38:00,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:38:00,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:38:00,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:38:00,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:38:00,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:38:00,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:38:00,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:38:00,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:38:00,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +10: [2023-05-25 13:38:00,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +10: [2023-05-25 13:38:00,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +10: [2023-05-25 13:38:00,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +10: [2023-05-25 13:38:00,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +13: [2023-05-25 13:38:00,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:38:00,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:38:00,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:38:00,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +13: [2023-05-25 13:38:00,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:38:00,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:38:00,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:38:00,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +12: [2023-05-25 13:38:00,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +15: [2023-05-25 13:38:00,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... + 8: [2023-05-25 13:38:00,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +12: [2023-05-25 13:38:00,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +12: [2023-05-25 13:38:00,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... + 8: [2023-05-25 13:38:00,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... + 8: [2023-05-25 13:38:00,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +12: [2023-05-25 13:38:00,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:38:00,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +15: [2023-05-25 13:38:00,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +12: [2023-05-25 13:38:00,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +12: [2023-05-25 13:38:00,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +12: [2023-05-25 13:38:00,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:38:00,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... + 8: [2023-05-25 13:38:00,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +15: [2023-05-25 13:38:00,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... + 8: [2023-05-25 13:38:00,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... + 9: [2023-05-25 13:38:00,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +15: [2023-05-25 13:38:00,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +15: [2023-05-25 13:38:00,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +15: [2023-05-25 13:38:00,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +13: [2023-05-25 13:38:00,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,537] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:38:00,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +13: [2023-05-25 13:38:00,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +13: [2023-05-25 13:38:00,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +13: [2023-05-25 13:38:00,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +13: [2023-05-25 13:38:00,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +13: [2023-05-25 13:38:00,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +13: [2023-05-25 13:38:00,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... + 9: [2023-05-25 13:38:00,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +14: [2023-05-25 13:38:00,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,563] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +14: [2023-05-25 13:38:00,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +15: [2023-05-25 13:38:00,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +16: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:38:00,566] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +10: [2023-05-25 13:38:00,566] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +11: [2023-05-25 13:38:00,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... +15: [2023-05-25 13:38:00,569] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 2: [2023-05-25 13:38:00,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... + 2: [2023-05-25 13:38:00,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... +16: [2023-05-25 13:38:00,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +16: [2023-05-25 13:38:00,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +16: [2023-05-25 13:38:00,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... + 2: [2023-05-25 13:38:00,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... +16: [2023-05-25 13:38:00,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... + 2: [2023-05-25 13:38:00,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... +16: [2023-05-25 13:38:00,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... + 4: [2023-05-25 13:38:00,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +12: [2023-05-25 13:38:00,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +16: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +11: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +16: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... + 8: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +16: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +13: [2023-05-25 13:38:00,572] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,573] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:38:00,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,576] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,576] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... + 4: [2023-05-25 13:38:00,576] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 4: [2023-05-25 13:38:00,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... +25: [2023-05-25 13:38:00,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +25: [2023-05-25 13:38:00,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +10: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +25: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +25: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +25: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +15: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +25: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +25: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +25: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 4: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... + 4: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... +25: [2023-05-25 13:38:00,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:38:00,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:38:00,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +25: [2023-05-25 13:38:00,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +25: [2023-05-25 13:38:00,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +25: [2023-05-25 13:38:00,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +25: [2023-05-25 13:38:00,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +25: [2023-05-25 13:38:00,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +15: [2023-05-25 13:38:00,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +10: [2023-05-25 13:38:00,588] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +13: [2023-05-25 13:38:00,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +12: [2023-05-25 13:38:00,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +13: [2023-05-25 13:38:00,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +12: [2023-05-25 13:38:00,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,594] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,594] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +17: [2023-05-25 13:38:00,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:38:00,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:38:00,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:38:00,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:38:00,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:38:00,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:38:00,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:38:00,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,598] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,599] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 0: [2023-05-25 13:38:00,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,599] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 5: [2023-05-25 13:38:00,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... + 5: [2023-05-25 13:38:00,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... + 5: [2023-05-25 13:38:00,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +16: [2023-05-25 13:38:00,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... + 5: [2023-05-25 13:38:00,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... +17: [2023-05-25 13:38:00,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... + 0: [2023-05-25 13:38:00,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +17: [2023-05-25 13:38:00,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +17: [2023-05-25 13:38:00,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +17: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +17: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +17: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... + 0: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... + 0: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... +17: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... + 0: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... +16: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... +17: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... + 0: [2023-05-25 13:38:00,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... + 0: [2023-05-25 13:38:00,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 2: [2023-05-25 13:38:00,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... +25: [2023-05-25 13:38:00,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +16: [2023-05-25 13:38:00,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +16: [2023-05-25 13:38:00,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,617] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... +25: [2023-05-25 13:38:00,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 6: [2023-05-25 13:38:00,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +22: [2023-05-25 13:38:00,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 6: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... + 6: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... +22: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,620] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +18: [2023-05-25 13:38:00,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:38:00,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +23: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +22: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:38:00,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +22: [2023-05-25 13:38:00,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +22: [2023-05-25 13:38:00,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +23: [2023-05-25 13:38:00,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +20: [2023-05-25 13:38:00,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +20: [2023-05-25 13:38:00,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +20: [2023-05-25 13:38:00,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +20: [2023-05-25 13:38:00,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +20: [2023-05-25 13:38:00,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +18: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +20: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +20: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +20: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +18: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +18: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +18: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +23: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +23: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +23: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +18: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +18: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +23: [2023-05-25 13:38:00,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +23: [2023-05-25 13:38:00,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +23: [2023-05-25 13:38:00,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +23: [2023-05-25 13:38:00,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +27: [2023-05-25 13:38:00,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +25: [2023-05-25 13:38:00,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +27: [2023-05-25 13:38:00,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:38:00,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:38:00,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:38:00,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:38:00,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:38:00,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:38:00,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +20: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +28: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +28: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +20: [2023-05-25 13:38:00,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +20: [2023-05-25 13:38:00,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +20: [2023-05-25 13:38:00,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +27: [2023-05-25 13:38:00,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +20: [2023-05-25 13:38:00,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... + 5: [2023-05-25 13:38:00,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +27: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +27: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +27: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +27: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +27: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +27: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +20: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +20: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +20: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +25: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +28: [2023-05-25 13:38:00,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +28: [2023-05-25 13:38:00,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +28: [2023-05-25 13:38:00,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +28: [2023-05-25 13:38:00,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +17: [2023-05-25 13:38:00,639] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:38:00,639] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +28: [2023-05-25 13:38:00,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +28: [2023-05-25 13:38:00,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +28: [2023-05-25 13:38:00,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +28: [2023-05-25 13:38:00,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... + 0: [2023-05-25 13:38:00,643] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,643] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +30: [2023-05-25 13:38:00,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,645] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +29: [2023-05-25 13:38:00,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:38:00,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +30: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +30: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +29: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +30: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +29: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +30: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +26: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +26: [2023-05-25 13:38:00,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +24: [2023-05-25 13:38:00,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:38:00,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:38:00,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:38:00,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:38:00,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:38:00,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +26: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +26: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +26: [2023-05-25 13:38:00,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... + 7: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +24: [2023-05-25 13:38:00,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +24: [2023-05-25 13:38:00,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... + 7: [2023-05-25 13:38:00,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +29: [2023-05-25 13:38:00,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +24: [2023-05-25 13:38:00,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +29: [2023-05-25 13:38:00,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:38:00,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +17: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +17: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +24: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +31: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +29: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +29: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +24: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +31: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +29: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +24: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +31: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +31: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +31: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +24: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +24: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +31: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +26: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +26: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... + 3: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... +29: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +29: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +26: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... + 3: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... + 3: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... +29: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +26: [2023-05-25 13:38:00,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +26: [2023-05-25 13:38:00,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... + 5: [2023-05-25 13:38:00,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... + 1: [2023-05-25 13:38:00,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... + 1: [2023-05-25 13:38:00,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 1: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 1: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... + 1: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... + 3: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... + 3: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... + 3: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 0: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +18: [2023-05-25 13:38:00,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... +23: [2023-05-25 13:38:00,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 7: [2023-05-25 13:38:00,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... +23: [2023-05-25 13:38:00,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 7: [2023-05-25 13:38:00,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... + 6: [2023-05-25 13:38:00,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... +18: [2023-05-25 13:38:00,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +27: [2023-05-25 13:38:00,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +20: [2023-05-25 13:38:00,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +27: [2023-05-25 13:38:00,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +23: [2023-05-25 13:38:00,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +18: [2023-05-25 13:38:00,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +23: [2023-05-25 13:38:00,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +20: [2023-05-25 13:38:00,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +28: [2023-05-25 13:38:00,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:38:00,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:38:00,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +30: [2023-05-25 13:38:00,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:38:00,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +30: [2023-05-25 13:38:00,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +22: [2023-05-25 13:38:00,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +20: [2023-05-25 13:38:00,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +20: [2023-05-25 13:38:00,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +28: [2023-05-25 13:38:00,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +31: [2023-05-25 13:38:00,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +22: [2023-05-25 13:38:00,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +24: [2023-05-25 13:38:00,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:38:00,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:38:00,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:38:00,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:38:00,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +26: [2023-05-25 13:38:00,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:38:00,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:38:00,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +21: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +30: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +30: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +21: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +21: [2023-05-25 13:38:00,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +21: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +31: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +21: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +21: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +21: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +21: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +21: [2023-05-25 13:38:00,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +24: [2023-05-25 13:38:00,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +26: [2023-05-25 13:38:00,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +26: [2023-05-25 13:38:00,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +24: [2023-05-25 13:38:00,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +31: [2023-05-25 13:38:00,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:38:00,706] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:38:00,706] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,706] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +19: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +19: [2023-05-25 13:38:00,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +19: [2023-05-25 13:38:00,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +19: [2023-05-25 13:38:00,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +19: [2023-05-25 13:38:00,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +19: [2023-05-25 13:38:00,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +19: [2023-05-25 13:38:00,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +19: [2023-05-25 13:38:00,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +19: [2023-05-25 13:38:00,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +21: [2023-05-25 13:38:00,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:38:00,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:38:00,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +21: [2023-05-25 13:38:00,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +19: [2023-05-25 13:38:00,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:38:00,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. + 9: [2023-05-25 13:38:00,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +19: [2023-05-25 13:38:00,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +19: [2023-05-25 13:38:00,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +14: [2023-05-25 13:38:00,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +14: [2023-05-25 13:38:00,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +11: [2023-05-25 13:38:00,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +11: [2023-05-25 13:38:00,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +14: [2023-05-25 13:38:00,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +11: [2023-05-25 13:38:00,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +14: [2023-05-25 13:38:00,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. + 2: [2023-05-25 13:38:00,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. + 8: [2023-05-25 13:38:00,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. + 8: [2023-05-25 13:38:00,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. +14: [2023-05-25 13:38:00,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +14: [2023-05-25 13:38:00,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. + 2: [2023-05-25 13:38:00,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. + 2: [2023-05-25 13:38:00,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +15: [2023-05-25 13:38:00,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +15: [2023-05-25 13:38:00,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +15: [2023-05-25 13:38:00,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +15: [2023-05-25 13:38:00,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +23: [2023-05-25 13:38:00,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +23: [2023-05-25 13:38:00,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. + 2: [2023-05-25 13:38:00,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +13: [2023-05-25 13:38:00,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +13: [2023-05-25 13:38:00,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. +14: [2023-05-25 13:38:00,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. + 9: [2023-05-25 13:38:00,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +13: [2023-05-25 13:38:00,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +13: [2023-05-25 13:38:00,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +14: [2023-05-25 13:38:00,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +23: [2023-05-25 13:38:00,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +23: [2023-05-25 13:38:00,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,835] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +11: [2023-05-25 13:38:00,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +14: [2023-05-25 13:38:00,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. +14: [2023-05-25 13:38:00,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. + 9: [2023-05-25 13:38:00,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. + 6: [2023-05-25 13:38:00,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. + 6: [2023-05-25 13:38:00,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. +11: [2023-05-25 13:38:00,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. +11: [2023-05-25 13:38:00,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,848] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. + 4: [2023-05-25 13:38:00,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. +11: [2023-05-25 13:38:00,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +14: [2023-05-25 13:38:00,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +14: [2023-05-25 13:38:00,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +12: [2023-05-25 13:38:00,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +12: [2023-05-25 13:38:00,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +11: [2023-05-25 13:38:00,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +18: [2023-05-25 13:38:00,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. + 9: [2023-05-25 13:38:00,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. + 9: [2023-05-25 13:38:00,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +18: [2023-05-25 13:38:00,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. + 8: [2023-05-25 13:38:00,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +22: [2023-05-25 13:38:00,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +11: [2023-05-25 13:38:00,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +22: [2023-05-25 13:38:00,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. + 6: [2023-05-25 13:38:00,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +15: [2023-05-25 13:38:00,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +20: [2023-05-25 13:38:00,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +20: [2023-05-25 13:38:00,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. + 8: [2023-05-25 13:38:00,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +17: [2023-05-25 13:38:00,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. + 0: [2023-05-25 13:38:00,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. +17: [2023-05-25 13:38:00,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. + 0: [2023-05-25 13:38:00,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. +10: [2023-05-25 13:38:00,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +15: [2023-05-25 13:38:00,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +10: [2023-05-25 13:38:00,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +10: [2023-05-25 13:38:00,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +10: [2023-05-25 13:38:00,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,863] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. + 5: [2023-05-25 13:38:00,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. +18: [2023-05-25 13:38:00,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. + 3: [2023-05-25 13:38:00,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. +10: [2023-05-25 13:38:00,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +10: [2023-05-25 13:38:00,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +18: [2023-05-25 13:38:00,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +13: [2023-05-25 13:38:00,867] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +13: [2023-05-25 13:38:00,867] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +18: [2023-05-25 13:38:00,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +12: [2023-05-25 13:38:00,869] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. + 6: [2023-05-25 13:38:00,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +18: [2023-05-25 13:38:00,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +12: [2023-05-25 13:38:00,870] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +22: [2023-05-25 13:38:00,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +22: [2023-05-25 13:38:00,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. + 4: [2023-05-25 13:38:00,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +17: [2023-05-25 13:38:00,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +26: [2023-05-25 13:38:00,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +15: [2023-05-25 13:38:00,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +26: [2023-05-25 13:38:00,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. + 0: [2023-05-25 13:38:00,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +17: [2023-05-25 13:38:00,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +16: [2023-05-25 13:38:00,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +16: [2023-05-25 13:38:00,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +20: [2023-05-25 13:38:00,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +23: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +23: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +20: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +17: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +17: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +13: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +15: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +22: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +13: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. + 3: [2023-05-25 13:38:00,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. + 8: [2023-05-25 13:38:00,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. + 8: [2023-05-25 13:38:00,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +22: [2023-05-25 13:38:00,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +10: [2023-05-25 13:38:00,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +10: [2023-05-25 13:38:00,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +18: [2023-05-25 13:38:00,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +18: [2023-05-25 13:38:00,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... + 9: [2023-05-25 13:38:00,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... + 9: [2023-05-25 13:38:00,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... + 8: [2023-05-25 13:38:00,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... + 6: [2023-05-25 13:38:00,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +13: [2023-05-25 13:38:00,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +26: [2023-05-25 13:38:00,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +26: [2023-05-25 13:38:00,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +23: [2023-05-25 13:38:00,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +23: [2023-05-25 13:38:00,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +15: [2023-05-25 13:38:00,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +15: [2023-05-25 13:38:00,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. + 4: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +14: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +14: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +14: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +14: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +14: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +14: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +16: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +22: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. + 2: [2023-05-25 13:38:00,889] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +24: [2023-05-25 13:38:00,889] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +16: [2023-05-25 13:38:00,889] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +16: [2023-05-25 13:38:00,889] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +16: [2023-05-25 13:38:00,889] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +24: [2023-05-25 13:38:00,889] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +13: [2023-05-25 13:38:00,889] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +17: [2023-05-25 13:38:00,890] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +17: [2023-05-25 13:38:00,890] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +14: [2023-05-25 13:38:00,891] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +14: [2023-05-25 13:38:00,891] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +18: [2023-05-25 13:38:00,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +18: [2023-05-25 13:38:00,891] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +14: [2023-05-25 13:38:00,891] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +16: [2023-05-25 13:38:00,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. + 8: [2023-05-25 13:38:00,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +14: [2023-05-25 13:38:00,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +14: [2023-05-25 13:38:00,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +14: [2023-05-25 13:38:00,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +16: [2023-05-25 13:38:00,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +14: [2023-05-25 13:38:00,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +22: [2023-05-25 13:38:00,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +13: [2023-05-25 13:38:00,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +15: [2023-05-25 13:38:00,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. + 1: [2023-05-25 13:38:00,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. + 1: [2023-05-25 13:38:00,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. +15: [2023-05-25 13:38:00,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. + 5: [2023-05-25 13:38:00,894] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +14: [2023-05-25 13:38:00,894] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,894] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. + 6: [2023-05-25 13:38:00,895] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +13: [2023-05-25 13:38:00,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +12: [2023-05-25 13:38:00,895] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,895] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,896] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +11: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. +12: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. +12: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,899] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. + 5: [2023-05-25 13:38:00,899] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +12: [2023-05-25 13:38:00,899] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +15: [2023-05-25 13:38:00,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +11: [2023-05-25 13:38:00,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +11: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +11: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +11: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +15: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +11: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +11: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +11: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +16: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +28: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +26: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +26: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +28: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +16: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +27: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +11: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +24: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +18: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +18: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +29: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +10: [2023-05-25 13:38:00,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +14: [2023-05-25 13:38:00,896] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +29: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. + 0: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +15: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +24: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +24: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +27: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. + 8: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +31: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +10: [2023-05-25 13:38:00,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +14: [2023-05-25 13:38:00,896] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +31: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +24: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. + 2: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +15: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +11: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +27: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +30: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. + 4: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +16: [2023-05-25 13:38:00,904] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,904] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,904] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +27: [2023-05-25 13:38:00,904] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +16: [2023-05-25 13:38:00,905] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,905] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +23: [2023-05-25 13:38:00,906] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. + 5: [2023-05-25 13:38:00,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +23: [2023-05-25 13:38:00,906] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. + 9: [2023-05-25 13:38:00,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... + 9: [2023-05-25 13:38:00,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +30: [2023-05-25 13:38:00,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +25: [2023-05-25 13:38:00,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +25: [2023-05-25 13:38:00,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +10: [2023-05-25 13:38:00,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. +10: [2023-05-25 13:38:00,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. + 3: [2023-05-25 13:38:00,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. + 3: [2023-05-25 13:38:00,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +30: [2023-05-25 13:38:00,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. + 3: [2023-05-25 13:38:00,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. + 3: [2023-05-25 13:38:00,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +25: [2023-05-25 13:38:00,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +15: [2023-05-25 13:38:00,908] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +30: [2023-05-25 13:38:00,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +25: [2023-05-25 13:38:00,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +19: [2023-05-25 13:38:00,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +26: [2023-05-25 13:38:00,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +26: [2023-05-25 13:38:00,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. + 8: [2023-05-25 13:38:00,909] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +15: [2023-05-25 13:38:00,910] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. + 1: [2023-05-25 13:38:00,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. + 6: [2023-05-25 13:38:00,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +20: [2023-05-25 13:38:00,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +20: [2023-05-25 13:38:00,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. + 0: [2023-05-25 13:38:00,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. + 5: [2023-05-25 13:38:00,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +12: [2023-05-25 13:38:00,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +28: [2023-05-25 13:38:00,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. + 1: [2023-05-25 13:38:00,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. + 6: [2023-05-25 13:38:00,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +29: [2023-05-25 13:38:00,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +24: [2023-05-25 13:38:00,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:38:00,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +12: [2023-05-25 13:38:00,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +26: [2023-05-25 13:38:00,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +27: [2023-05-25 13:38:00,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +26: [2023-05-25 13:38:00,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +28: [2023-05-25 13:38:00,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +31: [2023-05-25 13:38:00,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:38:00,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +24: [2023-05-25 13:38:00,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:38:00,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +28: [2023-05-25 13:38:00,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +10: [2023-05-25 13:38:00,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +10: [2023-05-25 13:38:00,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +23: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +23: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +31: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +12: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +17: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +20: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +31: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +10: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +20: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +31: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +10: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +17: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +28: [2023-05-25 13:38:00,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. + 8: [2023-05-25 13:38:00,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +21: [2023-05-25 13:38:00,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +15: [2023-05-25 13:38:00,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +21: [2023-05-25 13:38:00,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +15: [2023-05-25 13:38:00,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +26: [2023-05-25 13:38:00,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +12: [2023-05-25 13:38:00,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +12: [2023-05-25 13:38:00,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +30: [2023-05-25 13:38:00,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +19: [2023-05-25 13:38:00,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +30: [2023-05-25 13:38:00,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +25: [2023-05-25 13:38:00,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +25: [2023-05-25 13:38:00,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +19: [2023-05-25 13:38:00,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +27: [2023-05-25 13:38:00,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +27: [2023-05-25 13:38:00,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +27: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +25: [2023-05-25 13:38:00,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +25: [2023-05-25 13:38:00,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +26: [2023-05-25 13:38:00,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +14: [2023-05-25 13:38:00,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +14: [2023-05-25 13:38:00,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +20: [2023-05-25 13:38:00,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +30: [2023-05-25 13:38:00,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +30: [2023-05-25 13:38:00,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +10: [2023-05-25 13:38:00,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +20: [2023-05-25 13:38:00,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +29: [2023-05-25 13:38:00,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +12: [2023-05-25 13:38:00,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. + 7: [2023-05-25 13:38:00,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. +12: [2023-05-25 13:38:00,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +29: [2023-05-25 13:38:00,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +13: [2023-05-25 13:38:00,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. + 7: [2023-05-25 13:38:00,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. + 7: [2023-05-25 13:38:00,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +28: [2023-05-25 13:38:00,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +13: [2023-05-25 13:38:00,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. +10: [2023-05-25 13:38:00,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +17: [2023-05-25 13:38:00,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +31: [2023-05-25 13:38:00,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +28: [2023-05-25 13:38:00,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +11: [2023-05-25 13:38:00,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +19: [2023-05-25 13:38:00,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +21: [2023-05-25 13:38:00,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +12: [2023-05-25 13:38:00,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +21: [2023-05-25 13:38:00,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +20: [2023-05-25 13:38:00,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +19: [2023-05-25 13:38:00,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +21: [2023-05-25 13:38:00,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +17: [2023-05-25 13:38:00,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +31: [2023-05-25 13:38:00,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +13: [2023-05-25 13:38:00,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +20: [2023-05-25 13:38:00,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +19: [2023-05-25 13:38:00,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +21: [2023-05-25 13:38:00,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +11: [2023-05-25 13:38:00,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +14: [2023-05-25 13:38:00,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +10: [2023-05-25 13:38:00,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +14: [2023-05-25 13:38:00,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +21: [2023-05-25 13:38:00,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +21: [2023-05-25 13:38:00,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +10: [2023-05-25 13:38:00,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +10: [2023-05-25 13:38:00,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +10: [2023-05-25 13:38:00,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +24: [2023-05-25 13:38:00,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +24: [2023-05-25 13:38:00,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +13: [2023-05-25 13:38:00,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... + 7: [2023-05-25 13:38:00,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +13: [2023-05-25 13:38:00,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +12: [2023-05-25 13:38:00,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... +12: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +15: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +10: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +10: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... + 2: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... +19: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. + 8: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... +19: [2023-05-25 13:38:00,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... + 2: [2023-05-25 13:38:00,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +19: [2023-05-25 13:38:00,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +13: [2023-05-25 13:38:00,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +19: [2023-05-25 13:38:00,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +13: [2023-05-25 13:38:00,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... + 4: [2023-05-25 13:38:00,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +27: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. + 7: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +27: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. + 4: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +11: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +13: [2023-05-25 13:38:00,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... + 2: [2023-05-25 13:38:00,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... +21: [2023-05-25 13:38:00,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... + 7: [2023-05-25 13:38:00,947] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +15: [2023-05-25 13:38:00,947] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,947] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:00,947] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +21: [2023-05-25 13:38:00,948] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,948] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +11: [2023-05-25 13:38:00,948] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,948] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,949] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,949] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,949] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,949] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... + 4: [2023-05-25 13:38:00,949] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... +12: [2023-05-25 13:38:00,949] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,949] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... + 5: [2023-05-25 13:38:00,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,950] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... +15: [2023-05-25 13:38:00,950] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +15: [2023-05-25 13:38:00,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... + 4: [2023-05-25 13:38:00,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... +21: [2023-05-25 13:38:00,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... + 5: [2023-05-25 13:38:00,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +24: [2023-05-25 13:38:00,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... + 7: [2023-05-25 13:38:00,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,953] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +22: [2023-05-25 13:38:00,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +22: [2023-05-25 13:38:00,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. + 5: [2023-05-25 13:38:00,953] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... +24: [2023-05-25 13:38:00,953] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,954] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... +21: [2023-05-25 13:38:00,954] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +15: [2023-05-25 13:38:00,954] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +13: [2023-05-25 13:38:00,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +12: [2023-05-25 13:38:00,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +13: [2023-05-25 13:38:00,955] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +19: [2023-05-25 13:38:00,955] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +19: [2023-05-25 13:38:00,955] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +12: [2023-05-25 13:38:00,959] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +12: [2023-05-25 13:38:00,959] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +28: [2023-05-25 13:38:00,959] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +29: [2023-05-25 13:38:00,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +28: [2023-05-25 13:38:00,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. + 5: [2023-05-25 13:38:00,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... +29: [2023-05-25 13:38:00,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. + 5: [2023-05-25 13:38:00,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... + 5: [2023-05-25 13:38:00,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... +15: [2023-05-25 13:38:00,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +27: [2023-05-25 13:38:00,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +27: [2023-05-25 13:38:00,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +25: [2023-05-25 13:38:00,962] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +25: [2023-05-25 13:38:00,962] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +15: [2023-05-25 13:38:00,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +15: [2023-05-25 13:38:00,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +15: [2023-05-25 13:38:00,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +10: [2023-05-25 13:38:00,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +10: [2023-05-25 13:38:00,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:00,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +15: [2023-05-25 13:38:00,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +10: [2023-05-25 13:38:00,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +10: [2023-05-25 13:38:00,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +29: [2023-05-25 13:38:00,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +13: [2023-05-25 13:38:00,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +22: [2023-05-25 13:38:00,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +29: [2023-05-25 13:38:00,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +10: [2023-05-25 13:38:00,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +28: [2023-05-25 13:38:00,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +28: [2023-05-25 13:38:00,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +22: [2023-05-25 13:38:00,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +25: [2023-05-25 13:38:00,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +10: [2023-05-25 13:38:00,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +28: [2023-05-25 13:38:00,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +28: [2023-05-25 13:38:00,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +28: [2023-05-25 13:38:00,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +13: [2023-05-25 13:38:00,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +28: [2023-05-25 13:38:00,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +28: [2023-05-25 13:38:00,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +28: [2023-05-25 13:38:00,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:00,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +10: [2023-05-25 13:38:00,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +28: [2023-05-25 13:38:00,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +30: [2023-05-25 13:38:00,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +28: [2023-05-25 13:38:00,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +28: [2023-05-25 13:38:00,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +30: [2023-05-25 13:38:00,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. + 8: [2023-05-25 13:38:00,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +28: [2023-05-25 13:38:00,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +10: [2023-05-25 13:38:00,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +28: [2023-05-25 13:38:00,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +28: [2023-05-25 13:38:00,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +31: [2023-05-25 13:38:00,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +31: [2023-05-25 13:38:00,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +31: [2023-05-25 13:38:00,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +31: [2023-05-25 13:38:00,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +31: [2023-05-25 13:38:00,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +13: [2023-05-25 13:38:00,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +31: [2023-05-25 13:38:00,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +12: [2023-05-25 13:38:00,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +31: [2023-05-25 13:38:00,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +12: [2023-05-25 13:38:00,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +31: [2023-05-25 13:38:00,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +31: [2023-05-25 13:38:00,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +31: [2023-05-25 13:38:00,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +31: [2023-05-25 13:38:00,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +13: [2023-05-25 13:38:00,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... + 4: [2023-05-25 13:38:00,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +12: [2023-05-25 13:38:00,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... + 5: [2023-05-25 13:38:00,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +31: [2023-05-25 13:38:00,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +13: [2023-05-25 13:38:00,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... + 5: [2023-05-25 13:38:00,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +30: [2023-05-25 13:38:00,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:38:00,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:38:00,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:38:00,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:38:00,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:38:00,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +13: [2023-05-25 13:38:00,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +30: [2023-05-25 13:38:00,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:38:00,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:00,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:38:00,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +30: [2023-05-25 13:38:00,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +30: [2023-05-25 13:38:00,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +31: [2023-05-25 13:38:00,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +31: [2023-05-25 13:38:00,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +30: [2023-05-25 13:38:00,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +30: [2023-05-25 13:38:00,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +30: [2023-05-25 13:38:00,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +30: [2023-05-25 13:38:00,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +29: [2023-05-25 13:38:00,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:00,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:00,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:00,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:00,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:38:00,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +29: [2023-05-25 13:38:00,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +29: [2023-05-25 13:38:00,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +29: [2023-05-25 13:38:00,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +29: [2023-05-25 13:38:00,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... + 6: [2023-05-25 13:38:00,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +16: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +16: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +16: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +16: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +16: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +16: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +16: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +16: [2023-05-25 13:38:01,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +27: [2023-05-25 13:38:01,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +27: [2023-05-25 13:38:01,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +27: [2023-05-25 13:38:01,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +27: [2023-05-25 13:38:01,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +27: [2023-05-25 13:38:01,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +27: [2023-05-25 13:38:01,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +27: [2023-05-25 13:38:01,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +16: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +16: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +16: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +16: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +16: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +16: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +16: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +28: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +16: [2023-05-25 13:38:01,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +27: [2023-05-25 13:38:01,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +31: [2023-05-25 13:38:01,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +31: [2023-05-25 13:38:01,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +27: [2023-05-25 13:38:01,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... +28: [2023-05-25 13:38:01,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... +27: [2023-05-25 13:38:01,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... +27: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... + 6: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +28: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +27: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... +29: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +27: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +17: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +17: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +17: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +27: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +17: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +17: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... +17: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +17: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +27: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +17: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +28: [2023-05-25 13:38:01,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +27: [2023-05-25 13:38:01,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +13: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +29: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +28: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +29: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +17: [2023-05-25 13:38:01,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +17: [2023-05-25 13:38:01,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +17: [2023-05-25 13:38:01,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +17: [2023-05-25 13:38:01,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +17: [2023-05-25 13:38:01,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +17: [2023-05-25 13:38:01,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +17: [2023-05-25 13:38:01,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +17: [2023-05-25 13:38:01,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +28: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... +13: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... + 7: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +13: [2023-05-25 13:38:01,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... +24: [2023-05-25 13:38:01,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... +26: [2023-05-25 13:38:01,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +26: [2023-05-25 13:38:01,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +26: [2023-05-25 13:38:01,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +28: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +26: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +26: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +26: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +26: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +31: [2023-05-25 13:38:01,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +13: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +26: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +23: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:01,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... + 1: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... +24: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +24: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +26: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... +26: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... +26: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +23: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... +24: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +24: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... +24: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... +23: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +24: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... + 1: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... +23: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +26: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +23: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +23: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +23: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +26: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +31: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +24: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +23: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +23: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +26: [2023-05-25 13:38:01,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +26: [2023-05-25 13:38:01,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +26: [2023-05-25 13:38:01,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +28: [2023-05-25 13:38:01,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +30: [2023-05-25 13:38:01,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:01,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:01,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +25: [2023-05-25 13:38:01,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +30: [2023-05-25 13:38:01,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +16: [2023-05-25 13:38:01,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +25: [2023-05-25 13:38:01,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +25: [2023-05-25 13:38:01,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +25: [2023-05-25 13:38:01,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +25: [2023-05-25 13:38:01,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... +25: [2023-05-25 13:38:01,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... +31: [2023-05-25 13:38:01,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +30: [2023-05-25 13:38:01,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:38:01,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +20: [2023-05-25 13:38:01,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:01,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:01,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:01,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:01,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:01,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:01,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:01,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +30: [2023-05-25 13:38:01,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... +30: [2023-05-25 13:38:01,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... +20: [2023-05-25 13:38:01,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +16: [2023-05-25 13:38:01,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +31: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +20: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +20: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +20: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... + 6: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +20: [2023-05-25 13:38:01,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +20: [2023-05-25 13:38:01,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +20: [2023-05-25 13:38:01,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +20: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +22: [2023-05-25 13:38:01,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +22: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +22: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +22: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +22: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +18: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +22: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +22: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +22: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +18: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +18: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +18: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +29: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +29: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +27: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:38:01,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +22: [2023-05-25 13:38:01,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +22: [2023-05-25 13:38:01,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +27: [2023-05-25 13:38:01,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +22: [2023-05-25 13:38:01,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +16: [2023-05-25 13:38:01,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +22: [2023-05-25 13:38:01,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +22: [2023-05-25 13:38:01,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +22: [2023-05-25 13:38:01,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +22: [2023-05-25 13:38:01,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +17: [2023-05-25 13:38:01,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +30: [2023-05-25 13:38:01,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +17: [2023-05-25 13:38:01,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +31: [2023-05-25 13:38:01,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +31: [2023-05-25 13:38:01,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +16: [2023-05-25 13:38:01,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +23: [2023-05-25 13:38:01,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:01,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +31: [2023-05-25 13:38:01,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... +31: [2023-05-25 13:38:01,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +26: [2023-05-25 13:38:01,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +26: [2023-05-25 13:38:01,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +17: [2023-05-25 13:38:01,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +17: [2023-05-25 13:38:01,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +27: [2023-05-25 13:38:01,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +23: [2023-05-25 13:38:01,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +25: [2023-05-25 13:38:01,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +23: [2023-05-25 13:38:01,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +20: [2023-05-25 13:38:01,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +27: [2023-05-25 13:38:01,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +18: [2023-05-25 13:38:01,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:01,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +26: [2023-05-25 13:38:01,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +26: [2023-05-25 13:38:01,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +24: [2023-05-25 13:38:01,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +24: [2023-05-25 13:38:01,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +20: [2023-05-25 13:38:01,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +25: [2023-05-25 13:38:01,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +22: [2023-05-25 13:38:01,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +25: [2023-05-25 13:38:01,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +20: [2023-05-25 13:38:01,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +19: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:01,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:01,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +14: [2023-05-25 13:38:01,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +14: [2023-05-25 13:38:01,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +19: [2023-05-25 13:38:01,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +19: [2023-05-25 13:38:01,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +19: [2023-05-25 13:38:01,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +19: [2023-05-25 13:38:01,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +19: [2023-05-25 13:38:01,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +19: [2023-05-25 13:38:01,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +19: [2023-05-25 13:38:01,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +14: [2023-05-25 13:38:01,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +14: [2023-05-25 13:38:01,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +21: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:01,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +21: [2023-05-25 13:38:01,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +21: [2023-05-25 13:38:01,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +21: [2023-05-25 13:38:01,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +21: [2023-05-25 13:38:01,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +21: [2023-05-25 13:38:01,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +21: [2023-05-25 13:38:01,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +21: [2023-05-25 13:38:01,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +11: [2023-05-25 13:38:01,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +11: [2023-05-25 13:38:01,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +15: [2023-05-25 13:38:01,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +19: [2023-05-25 13:38:01,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:01,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +15: [2023-05-25 13:38:01,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +15: [2023-05-25 13:38:01,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:01,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +19: [2023-05-25 13:38:01,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +19: [2023-05-25 13:38:01,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +21: [2023-05-25 13:38:01,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:01,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +12: [2023-05-25 13:38:01,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +12: [2023-05-25 13:38:01,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +21: [2023-05-25 13:38:01,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +21: [2023-05-25 13:38:01,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +13: [2023-05-25 13:38:01,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +13: [2023-05-25 13:38:01,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. + 8: [2023-05-25 13:38:01,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. + 9: [2023-05-25 13:38:01,186] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +12: [2023-05-25 13:38:01,186] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +12: [2023-05-25 13:38:01,186] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +12: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. + 5: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +15: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +12: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +15: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. + 4: [2023-05-25 13:38:01,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +15: [2023-05-25 13:38:01,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +15: [2023-05-25 13:38:01,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +11: [2023-05-25 13:38:01,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +11: [2023-05-25 13:38:01,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. + 4: [2023-05-25 13:38:01,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +13: [2023-05-25 13:38:01,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +12: [2023-05-25 13:38:01,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:01,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:01,194] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +11: [2023-05-25 13:38:01,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +13: [2023-05-25 13:38:01,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:01,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,196] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:01,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +14: [2023-05-25 13:38:01,197] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +11: [2023-05-25 13:38:01,197] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +11: [2023-05-25 13:38:01,197] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +11: [2023-05-25 13:38:01,197] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +11: [2023-05-25 13:38:01,197] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +14: [2023-05-25 13:38:01,197] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +12: [2023-05-25 13:38:01,197] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +14: [2023-05-25 13:38:01,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +13: [2023-05-25 13:38:01,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +14: [2023-05-25 13:38:01,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +14: [2023-05-25 13:38:01,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +14: [2023-05-25 13:38:01,199] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,199] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +13: [2023-05-25 13:38:01,199] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... +11: [2023-05-25 13:38:01,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +14: [2023-05-25 13:38:01,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... +14: [2023-05-25 13:38:01,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +14: [2023-05-25 13:38:01,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +14: [2023-05-25 13:38:01,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:01,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. + 8: [2023-05-25 13:38:01,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +12: [2023-05-25 13:38:01,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +12: [2023-05-25 13:38:01,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +10: [2023-05-25 13:38:01,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +10: [2023-05-25 13:38:01,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +15: [2023-05-25 13:38:01,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +11: [2023-05-25 13:38:01,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:01,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +15: [2023-05-25 13:38:01,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:01,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 4: [2023-05-25 13:38:01,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. +12: [2023-05-25 13:38:01,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +12: [2023-05-25 13:38:01,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +11: [2023-05-25 13:38:01,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. + 3: [2023-05-25 13:38:01,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. + 3: [2023-05-25 13:38:01,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. + 0: [2023-05-25 13:38:01,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +10: [2023-05-25 13:38:01,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +14: [2023-05-25 13:38:01,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +10: [2023-05-25 13:38:01,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +10: [2023-05-25 13:38:01,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +14: [2023-05-25 13:38:01,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +10: [2023-05-25 13:38:01,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +15: [2023-05-25 13:38:01,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. + 6: [2023-05-25 13:38:01,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. + 2: [2023-05-25 13:38:01,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +13: [2023-05-25 13:38:01,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +13: [2023-05-25 13:38:01,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. + 0: [2023-05-25 13:38:01,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +15: [2023-05-25 13:38:01,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. +28: [2023-05-25 13:38:01,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +28: [2023-05-25 13:38:01,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. + 5: [2023-05-25 13:38:01,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 9: [2023-05-25 13:38:01,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +10: [2023-05-25 13:38:01,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +12: [2023-05-25 13:38:01,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +12: [2023-05-25 13:38:01,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +10: [2023-05-25 13:38:01,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +10: [2023-05-25 13:38:01,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +12: [2023-05-25 13:38:01,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:01,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:01,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +10: [2023-05-25 13:38:01,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:01,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +17: [2023-05-25 13:38:01,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. + 8: [2023-05-25 13:38:01,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +17: [2023-05-25 13:38:01,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. + 8: [2023-05-25 13:38:01,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +13: [2023-05-25 13:38:01,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:01,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +28: [2023-05-25 13:38:01,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +12: [2023-05-25 13:38:01,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +12: [2023-05-25 13:38:01,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +13: [2023-05-25 13:38:01,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +13: [2023-05-25 13:38:01,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:01,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:01,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +28: [2023-05-25 13:38:01,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +16: [2023-05-25 13:38:01,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +14: [2023-05-25 13:38:01,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +14: [2023-05-25 13:38:01,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:01,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +13: [2023-05-25 13:38:01,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:01,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +11: [2023-05-25 13:38:01,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +16: [2023-05-25 13:38:01,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +11: [2023-05-25 13:38:01,239] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... + 5: [2023-05-25 13:38:01,239] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. +13: [2023-05-25 13:38:01,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. +12: [2023-05-25 13:38:01,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... + 9: [2023-05-25 13:38:01,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +12: [2023-05-25 13:38:01,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +11: [2023-05-25 13:38:01,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +13: [2023-05-25 13:38:01,243] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +15: [2023-05-25 13:38:01,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +17: [2023-05-25 13:38:01,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +17: [2023-05-25 13:38:01,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. + 1: [2023-05-25 13:38:01,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +11: [2023-05-25 13:38:01,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +15: [2023-05-25 13:38:01,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:01,246] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +12: [2023-05-25 13:38:01,246] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +14: [2023-05-25 13:38:01,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:01,247] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:01,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +15: [2023-05-25 13:38:01,247] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +14: [2023-05-25 13:38:01,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. + 3: [2023-05-25 13:38:01,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +11: [2023-05-25 13:38:01,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +11: [2023-05-25 13:38:01,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +11: [2023-05-25 13:38:01,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +15: [2023-05-25 13:38:01,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +11: [2023-05-25 13:38:01,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +14: [2023-05-25 13:38:01,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +16: [2023-05-25 13:38:01,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +16: [2023-05-25 13:38:01,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +11: [2023-05-25 13:38:01,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... + 8: [2023-05-25 13:38:01,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +14: [2023-05-25 13:38:01,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:01,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:01,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:01,254] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +12: [2023-05-25 13:38:01,254] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:01,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +14: [2023-05-25 13:38:01,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +13: [2023-05-25 13:38:01,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. +12: [2023-05-25 13:38:01,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:01,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +15: [2023-05-25 13:38:01,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +29: [2023-05-25 13:38:01,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +10: [2023-05-25 13:38:01,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +14: [2023-05-25 13:38:01,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +29: [2023-05-25 13:38:01,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +10: [2023-05-25 13:38:01,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... + 8: [2023-05-25 13:38:01,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... + 1: [2023-05-25 13:38:01,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +12: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... +12: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... +29: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +29: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +13: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +10: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +14: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:01,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +28: [2023-05-25 13:38:01,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +28: [2023-05-25 13:38:01,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. + 4: [2023-05-25 13:38:01,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +15: [2023-05-25 13:38:01,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... + 9: [2023-05-25 13:38:01,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... + 9: [2023-05-25 13:38:01,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +15: [2023-05-25 13:38:01,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. +15: [2023-05-25 13:38:01,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... + 6: [2023-05-25 13:38:01,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 6: [2023-05-25 13:38:01,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 9: [2023-05-25 13:38:01,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... + 9: [2023-05-25 13:38:01,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +13: [2023-05-25 13:38:01,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +26: [2023-05-25 13:38:01,271] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +26: [2023-05-25 13:38:01,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +13: [2023-05-25 13:38:01,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +26: [2023-05-25 13:38:01,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +10: [2023-05-25 13:38:01,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +16: [2023-05-25 13:38:01,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +16: [2023-05-25 13:38:01,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +17: [2023-05-25 13:38:01,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +17: [2023-05-25 13:38:01,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. + 1: [2023-05-25 13:38:01,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. + 7: [2023-05-25 13:38:01,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. + 7: [2023-05-25 13:38:01,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. + 1: [2023-05-25 13:38:01,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. +23: [2023-05-25 13:38:01,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +23: [2023-05-25 13:38:01,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +26: [2023-05-25 13:38:01,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +28: [2023-05-25 13:38:01,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +28: [2023-05-25 13:38:01,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +10: [2023-05-25 13:38:01,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:01,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:01,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:01,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +10: [2023-05-25 13:38:01,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +13: [2023-05-25 13:38:01,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:01,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:01,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +13: [2023-05-25 13:38:01,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. + 0: [2023-05-25 13:38:01,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. +10: [2023-05-25 13:38:01,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... +13: [2023-05-25 13:38:01,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... +10: [2023-05-25 13:38:01,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... +10: [2023-05-25 13:38:01,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... + 6: [2023-05-25 13:38:01,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:01,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +26: [2023-05-25 13:38:01,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +31: [2023-05-25 13:38:01,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. + 2: [2023-05-25 13:38:01,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +10: [2023-05-25 13:38:01,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +30: [2023-05-25 13:38:01,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +31: [2023-05-25 13:38:01,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +30: [2023-05-25 13:38:01,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +26: [2023-05-25 13:38:01,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +15: [2023-05-25 13:38:01,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +15: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +30: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. + 6: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +30: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +29: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +30: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +30: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +29: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +18: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +18: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +23: [2023-05-25 13:38:01,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +30: [2023-05-25 13:38:01,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +23: [2023-05-25 13:38:01,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +26: [2023-05-25 13:38:01,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +26: [2023-05-25 13:38:01,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +23: [2023-05-25 13:38:01,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +23: [2023-05-25 13:38:01,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +23: [2023-05-25 13:38:01,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +25: [2023-05-25 13:38:01,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +23: [2023-05-25 13:38:01,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +30: [2023-05-25 13:38:01,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +25: [2023-05-25 13:38:01,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +16: [2023-05-25 13:38:01,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +17: [2023-05-25 13:38:01,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +17: [2023-05-25 13:38:01,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +16: [2023-05-25 13:38:01,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +31: [2023-05-25 13:38:01,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +31: [2023-05-25 13:38:01,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. + 7: [2023-05-25 13:38:01,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 2: [2023-05-25 13:38:01,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. +14: [2023-05-25 13:38:01,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +31: [2023-05-25 13:38:01,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +31: [2023-05-25 13:38:01,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. + 6: [2023-05-25 13:38:01,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +10: [2023-05-25 13:38:01,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +14: [2023-05-25 13:38:01,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +14: [2023-05-25 13:38:01,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +14: [2023-05-25 13:38:01,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +22: [2023-05-25 13:38:01,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +31: [2023-05-25 13:38:01,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +20: [2023-05-25 13:38:01,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +20: [2023-05-25 13:38:01,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +29: [2023-05-25 13:38:01,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +29: [2023-05-25 13:38:01,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +29: [2023-05-25 13:38:01,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +29: [2023-05-25 13:38:01,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +23: [2023-05-25 13:38:01,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... +10: [2023-05-25 13:38:01,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +18: [2023-05-25 13:38:01,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +31: [2023-05-25 13:38:01,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +23: [2023-05-25 13:38:01,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +23: [2023-05-25 13:38:01,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +23: [2023-05-25 13:38:01,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +26: [2023-05-25 13:38:01,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +26: [2023-05-25 13:38:01,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +28: [2023-05-25 13:38:01,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +28: [2023-05-25 13:38:01,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +28: [2023-05-25 13:38:01,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +28: [2023-05-25 13:38:01,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +29: [2023-05-25 13:38:01,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. +29: [2023-05-25 13:38:01,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. +30: [2023-05-25 13:38:01,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +27: [2023-05-25 13:38:01,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +27: [2023-05-25 13:38:01,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +25: [2023-05-25 13:38:01,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +30: [2023-05-25 13:38:01,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +30: [2023-05-25 13:38:01,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +15: [2023-05-25 13:38:01,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 3: [2023-05-25 13:38:01,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 0: [2023-05-25 13:38:01,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. +15: [2023-05-25 13:38:01,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. +13: [2023-05-25 13:38:01,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +31: [2023-05-25 13:38:01,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +26: [2023-05-25 13:38:01,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +25: [2023-05-25 13:38:01,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +30: [2023-05-25 13:38:01,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +13: [2023-05-25 13:38:01,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +31: [2023-05-25 13:38:01,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +28: [2023-05-25 13:38:01,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +28: [2023-05-25 13:38:01,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +31: [2023-05-25 13:38:01,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +28: [2023-05-25 13:38:01,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +24: [2023-05-25 13:38:01,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +28: [2023-05-25 13:38:01,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +13: [2023-05-25 13:38:01,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +26: [2023-05-25 13:38:01,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +31: [2023-05-25 13:38:01,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +13: [2023-05-25 13:38:01,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... + 2: [2023-05-25 13:38:01,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +31: [2023-05-25 13:38:01,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +28: [2023-05-25 13:38:01,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +31: [2023-05-25 13:38:01,310] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +28: [2023-05-25 13:38:01,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +18: [2023-05-25 13:38:01,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +31: [2023-05-25 13:38:01,311] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +31: [2023-05-25 13:38:01,311] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +24: [2023-05-25 13:38:01,312] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +29: [2023-05-25 13:38:01,312] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +20: [2023-05-25 13:38:01,312] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... +20: [2023-05-25 13:38:01,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +27: [2023-05-25 13:38:01,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +27: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. + 2: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +19: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +19: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +29: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +25: [2023-05-25 13:38:01,315] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +25: [2023-05-25 13:38:01,315] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +22: [2023-05-25 13:38:01,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +29: [2023-05-25 13:38:01,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +28: [2023-05-25 13:38:01,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +28: [2023-05-25 13:38:01,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +10: [2023-05-25 13:38:01,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +10: [2023-05-25 13:38:01,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +29: [2023-05-25 13:38:01,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +30: [2023-05-25 13:38:01,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. + 3: [2023-05-25 13:38:01,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +30: [2023-05-25 13:38:01,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. + 0: [2023-05-25 13:38:01,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +21: [2023-05-25 13:38:01,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. + 3: [2023-05-25 13:38:01,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +27: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... +27: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +21: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +22: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +17: [2023-05-25 13:38:01,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +24: [2023-05-25 13:38:01,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +17: [2023-05-25 13:38:01,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +27: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... +27: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +26: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +26: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +24: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +24: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +18: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... +27: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +27: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +19: [2023-05-25 13:38:01,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. + 3: [2023-05-25 13:38:01,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 4: [2023-05-25 13:38:01,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... +19: [2023-05-25 13:38:01,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +24: [2023-05-25 13:38:01,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +24: [2023-05-25 13:38:01,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 4: [2023-05-25 13:38:01,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. +24: [2023-05-25 13:38:01,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +19: [2023-05-25 13:38:01,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +19: [2023-05-25 13:38:01,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +15: [2023-05-25 13:38:01,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +29: [2023-05-25 13:38:01,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +30: [2023-05-25 13:38:01,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... +25: [2023-05-25 13:38:01,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +25: [2023-05-25 13:38:01,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +29: [2023-05-25 13:38:01,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +30: [2023-05-25 13:38:01,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +17: [2023-05-25 13:38:01,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +15: [2023-05-25 13:38:01,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +29: [2023-05-25 13:38:01,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +29: [2023-05-25 13:38:01,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +16: [2023-05-25 13:38:01,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +28: [2023-05-25 13:38:01,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. +16: [2023-05-25 13:38:01,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +28: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. +17: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +27: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +24: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. + 5: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +27: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +24: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. + 1: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +21: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +25: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. +21: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +25: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. + 6: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 5: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... +26: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +26: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... + 5: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... +15: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +30: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +26: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +26: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +30: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 7: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 0: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... +19: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +26: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 7: [2023-05-25 13:38:01,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +31: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +31: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +31: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 7: [2023-05-25 13:38:01,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +19: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. +31: [2023-05-25 13:38:01,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +26: [2023-05-25 13:38:01,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +26: [2023-05-25 13:38:01,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +15: [2023-05-25 13:38:01,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +29: [2023-05-25 13:38:01,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +29: [2023-05-25 13:38:01,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +28: [2023-05-25 13:38:01,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +24: [2023-05-25 13:38:01,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. +24: [2023-05-25 13:38:01,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. + 7: [2023-05-25 13:38:01,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... +26: [2023-05-25 13:38:01,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +31: [2023-05-25 13:38:01,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... + 6: [2023-05-25 13:38:01,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 6: [2023-05-25 13:38:01,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... +31: [2023-05-25 13:38:01,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... +28: [2023-05-25 13:38:01,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +31: [2023-05-25 13:38:01,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +18: [2023-05-25 13:38:01,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +18: [2023-05-25 13:38:01,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. + 2: [2023-05-25 13:38:01,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +30: [2023-05-25 13:38:01,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +26: [2023-05-25 13:38:01,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +30: [2023-05-25 13:38:01,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +16: [2023-05-25 13:38:01,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +18: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +31: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +31: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +29: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +16: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +31: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +18: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +18: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +29: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +20: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +18: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +20: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +26: [2023-05-25 13:38:01,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +26: [2023-05-25 13:38:01,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. +26: [2023-05-25 13:38:01,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. +30: [2023-05-25 13:38:01,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +30: [2023-05-25 13:38:01,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +18: [2023-05-25 13:38:01,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +24: [2023-05-25 13:38:01,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +18: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +20: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +24: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +20: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +18: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +20: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +29: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +31: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +29: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +20: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +31: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +31: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +31: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +17: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +17: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +30: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +30: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +17: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +17: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +30: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +22: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +22: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +17: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +17: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +22: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +25: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +29: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +30: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +29: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +17: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +17: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +25: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +17: [2023-05-25 13:38:01,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +30: [2023-05-25 13:38:01,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +30: [2023-05-25 13:38:01,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +28: [2023-05-25 13:38:01,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +17: [2023-05-25 13:38:01,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +24: [2023-05-25 13:38:01,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +22: [2023-05-25 13:38:01,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +17: [2023-05-25 13:38:01,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +17: [2023-05-25 13:38:01,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +22: [2023-05-25 13:38:01,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +31: [2023-05-25 13:38:01,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +20: [2023-05-25 13:38:01,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +28: [2023-05-25 13:38:01,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +20: [2023-05-25 13:38:01,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +24: [2023-05-25 13:38:01,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +22: [2023-05-25 13:38:01,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +18: [2023-05-25 13:38:01,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +18: [2023-05-25 13:38:01,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +20: [2023-05-25 13:38:01,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +20: [2023-05-25 13:38:01,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +18: [2023-05-25 13:38:01,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +18: [2023-05-25 13:38:01,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... + 7: [2023-05-25 13:38:01,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... +28: [2023-05-25 13:38:01,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +26: [2023-05-25 13:38:01,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +26: [2023-05-25 13:38:01,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +22: [2023-05-25 13:38:01,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +31: [2023-05-25 13:38:01,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:01,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +20: [2023-05-25 13:38:01,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +17: [2023-05-25 13:38:01,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +24: [2023-05-25 13:38:01,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +24: [2023-05-25 13:38:01,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +24: [2023-05-25 13:38:01,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +22: [2023-05-25 13:38:01,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +27: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +27: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +27: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +27: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... +28: [2023-05-25 13:38:01,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:01,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +20: [2023-05-25 13:38:01,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +17: [2023-05-25 13:38:01,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +24: [2023-05-25 13:38:01,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +17: [2023-05-25 13:38:01,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +24: [2023-05-25 13:38:01,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +17: [2023-05-25 13:38:01,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +30: [2023-05-25 13:38:01,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +22: [2023-05-25 13:38:01,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +27: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +27: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +19: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +19: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +27: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +19: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +19: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +19: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +19: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... +24: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... + 4: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +30: [2023-05-25 13:38:01,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +19: [2023-05-25 13:38:01,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +19: [2023-05-25 13:38:01,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +19: [2023-05-25 13:38:01,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +22: [2023-05-25 13:38:01,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +19: [2023-05-25 13:38:01,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +19: [2023-05-25 13:38:01,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +19: [2023-05-25 13:38:01,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +25: [2023-05-25 13:38:01,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +25: [2023-05-25 13:38:01,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +25: [2023-05-25 13:38:01,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +25: [2023-05-25 13:38:01,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +25: [2023-05-25 13:38:01,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +25: [2023-05-25 13:38:01,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +27: [2023-05-25 13:38:01,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... + 6: [2023-05-25 13:38:01,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +27: [2023-05-25 13:38:01,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +25: [2023-05-25 13:38:01,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +25: [2023-05-25 13:38:01,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +25: [2023-05-25 13:38:01,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +25: [2023-05-25 13:38:01,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +25: [2023-05-25 13:38:01,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +25: [2023-05-25 13:38:01,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +27: [2023-05-25 13:38:01,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. +27: [2023-05-25 13:38:01,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. + 7: [2023-05-25 13:38:01,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +23: [2023-05-25 13:38:01,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +23: [2023-05-25 13:38:01,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,380] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,380] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,380] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +27: [2023-05-25 13:38:01,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +23: [2023-05-25 13:38:01,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +24: [2023-05-25 13:38:01,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +20: [2023-05-25 13:38:01,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +24: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +23: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +23: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +23: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +23: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +23: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +18: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +29: [2023-05-25 13:38:01,383] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +20: [2023-05-25 13:38:01,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +20: [2023-05-25 13:38:01,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. + 7: [2023-05-25 13:38:01,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +17: [2023-05-25 13:38:01,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +20: [2023-05-25 13:38:01,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +17: [2023-05-25 13:38:01,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +21: [2023-05-25 13:38:01,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +29: [2023-05-25 13:38:01,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +21: [2023-05-25 13:38:01,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +25: [2023-05-25 13:38:01,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +24: [2023-05-25 13:38:01,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +25: [2023-05-25 13:38:01,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +18: [2023-05-25 13:38:01,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +27: [2023-05-25 13:38:01,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +27: [2023-05-25 13:38:01,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... +24: [2023-05-25 13:38:01,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +29: [2023-05-25 13:38:01,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +22: [2023-05-25 13:38:01,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +25: [2023-05-25 13:38:01,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +25: [2023-05-25 13:38:01,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +24: [2023-05-25 13:38:01,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +24: [2023-05-25 13:38:01,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +24: [2023-05-25 13:38:01,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +22: [2023-05-25 13:38:01,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +22: [2023-05-25 13:38:01,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +29: [2023-05-25 13:38:01,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +28: [2023-05-25 13:38:01,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +22: [2023-05-25 13:38:01,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +24: [2023-05-25 13:38:01,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +24: [2023-05-25 13:38:01,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +20: [2023-05-25 13:38:01,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +18: [2023-05-25 13:38:01,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +22: [2023-05-25 13:38:01,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +24: [2023-05-25 13:38:01,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +22: [2023-05-25 13:38:01,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 1: [2023-05-25 13:38:01,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +28: [2023-05-25 13:38:01,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +18: [2023-05-25 13:38:01,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +22: [2023-05-25 13:38:01,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +26: [2023-05-25 13:38:01,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +26: [2023-05-25 13:38:01,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 1: [2023-05-25 13:38:01,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... +17: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +17: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +27: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +16: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +20: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +27: [2023-05-25 13:38:01,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +20: [2023-05-25 13:38:01,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +18: [2023-05-25 13:38:01,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +18: [2023-05-25 13:38:01,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +19: [2023-05-25 13:38:01,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +28: [2023-05-25 13:38:01,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +16: [2023-05-25 13:38:01,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +20: [2023-05-25 13:38:01,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +19: [2023-05-25 13:38:01,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +18: [2023-05-25 13:38:01,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +20: [2023-05-25 13:38:01,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +16: [2023-05-25 13:38:01,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +16: [2023-05-25 13:38:01,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +16: [2023-05-25 13:38:01,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +16: [2023-05-25 13:38:01,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +16: [2023-05-25 13:38:01,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +20: [2023-05-25 13:38:01,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +16: [2023-05-25 13:38:01,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +16: [2023-05-25 13:38:01,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... + 7: [2023-05-25 13:38:01,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... +20: [2023-05-25 13:38:01,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +21: [2023-05-25 13:38:01,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +20: [2023-05-25 13:38:01,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +28: [2023-05-25 13:38:01,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +26: [2023-05-25 13:38:01,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +26: [2023-05-25 13:38:01,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +21: [2023-05-25 13:38:01,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +25: [2023-05-25 13:38:01,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +22: [2023-05-25 13:38:01,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +23: [2023-05-25 13:38:01,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +11: [2023-05-25 13:38:01,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +19: [2023-05-25 13:38:01,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +25: [2023-05-25 13:38:01,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +11: [2023-05-25 13:38:01,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +19: [2023-05-25 13:38:01,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +22: [2023-05-25 13:38:01,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +11: [2023-05-25 13:38:01,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:01,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +23: [2023-05-25 13:38:01,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +21: [2023-05-25 13:38:01,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +21: [2023-05-25 13:38:01,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +25: [2023-05-25 13:38:01,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +22: [2023-05-25 13:38:01,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +22: [2023-05-25 13:38:01,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +19: [2023-05-25 13:38:01,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +19: [2023-05-25 13:38:01,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +14: [2023-05-25 13:38:01,429] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:01,429] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +22: [2023-05-25 13:38:01,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +27: [2023-05-25 13:38:01,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +20: [2023-05-25 13:38:01,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +21: [2023-05-25 13:38:01,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:01,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +21: [2023-05-25 13:38:01,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +21: [2023-05-25 13:38:01,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +21: [2023-05-25 13:38:01,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:01,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +25: [2023-05-25 13:38:01,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +14: [2023-05-25 13:38:01,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:01,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +22: [2023-05-25 13:38:01,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +20: [2023-05-25 13:38:01,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +27: [2023-05-25 13:38:01,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +20: [2023-05-25 13:38:01,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +27: [2023-05-25 13:38:01,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +21: [2023-05-25 13:38:01,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +13: [2023-05-25 13:38:01,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +21: [2023-05-25 13:38:01,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +21: [2023-05-25 13:38:01,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +21: [2023-05-25 13:38:01,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +21: [2023-05-25 13:38:01,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +27: [2023-05-25 13:38:01,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +20: [2023-05-25 13:38:01,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +21: [2023-05-25 13:38:01,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +13: [2023-05-25 13:38:01,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:01,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +21: [2023-05-25 13:38:01,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +21: [2023-05-25 13:38:01,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +13: [2023-05-25 13:38:01,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +19: [2023-05-25 13:38:01,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +21: [2023-05-25 13:38:01,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +19: [2023-05-25 13:38:01,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +16: [2023-05-25 13:38:01,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +16: [2023-05-25 13:38:01,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +12: [2023-05-25 13:38:01,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +21: [2023-05-25 13:38:01,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +12: [2023-05-25 13:38:01,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +12: [2023-05-25 13:38:01,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +12: [2023-05-25 13:38:01,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:01,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:01,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +15: [2023-05-25 13:38:01,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +15: [2023-05-25 13:38:01,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +15: [2023-05-25 13:38:01,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +15: [2023-05-25 13:38:01,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +21: [2023-05-25 13:38:01,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:01,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:01,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +21: [2023-05-25 13:38:01,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:01,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:01,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +19: [2023-05-25 13:38:01,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +19: [2023-05-25 13:38:01,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +21: [2023-05-25 13:38:01,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. +19: [2023-05-25 13:38:01,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +19: [2023-05-25 13:38:01,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +21: [2023-05-25 13:38:01,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +13: [2023-05-25 13:38:01,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:01,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +21: [2023-05-25 13:38:01,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +21: [2023-05-25 13:38:01,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +12: [2023-05-25 13:38:01,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. +12: [2023-05-25 13:38:01,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +12: [2023-05-25 13:38:01,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +12: [2023-05-25 13:38:01,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:01,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +21: [2023-05-25 13:38:01,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +21: [2023-05-25 13:38:01,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +11: [2023-05-25 13:38:01,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +11: [2023-05-25 13:38:01,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:01,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. +11: [2023-05-25 13:38:01,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +10: [2023-05-25 13:38:01,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:01,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:01,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +12: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +10: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +15: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +15: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +12: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +12: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +13: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +13: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +12: [2023-05-25 13:38:01,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +10: [2023-05-25 13:38:01,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. +14: [2023-05-25 13:38:01,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. +14: [2023-05-25 13:38:01,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. +10: [2023-05-25 13:38:01,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:01,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:01,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:01,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:01,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +15: [2023-05-25 13:38:01,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +15: [2023-05-25 13:38:01,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +15: [2023-05-25 13:38:01,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +13: [2023-05-25 13:38:01,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +15: [2023-05-25 13:38:01,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +13: [2023-05-25 13:38:01,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:01,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +10: [2023-05-25 13:38:01,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +12: [2023-05-25 13:38:01,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +10: [2023-05-25 13:38:01,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +12: [2023-05-25 13:38:01,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +14: [2023-05-25 13:38:01,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +14: [2023-05-25 13:38:01,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +28: [2023-05-25 13:38:01,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +28: [2023-05-25 13:38:01,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +13: [2023-05-25 13:38:01,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. +13: [2023-05-25 13:38:01,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. +11: [2023-05-25 13:38:01,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... + 9: [2023-05-25 13:38:01,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +11: [2023-05-25 13:38:01,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +15: [2023-05-25 13:38:01,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +26: [2023-05-25 13:38:01,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +26: [2023-05-25 13:38:01,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +12: [2023-05-25 13:38:01,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +12: [2023-05-25 13:38:01,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +15: [2023-05-25 13:38:01,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +12: [2023-05-25 13:38:01,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +12: [2023-05-25 13:38:01,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +12: [2023-05-25 13:38:01,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:01,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:01,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,536] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +11: [2023-05-25 13:38:01,537] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +12: [2023-05-25 13:38:01,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:01,538] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:01,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +11: [2023-05-25 13:38:01,539] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,539] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +28: [2023-05-25 13:38:01,540] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:01,542] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +11: [2023-05-25 13:38:01,542] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +10: [2023-05-25 13:38:01,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +10: [2023-05-25 13:38:01,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +28: [2023-05-25 13:38:01,542] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +10: [2023-05-25 13:38:01,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:01,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:01,543] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:01,543] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +26: [2023-05-25 13:38:01,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +26: [2023-05-25 13:38:01,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. + 1: [2023-05-25 13:38:01,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +13: [2023-05-25 13:38:01,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:01,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:01,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:01,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:01,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... + 9: [2023-05-25 13:38:01,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +27: [2023-05-25 13:38:01,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +14: [2023-05-25 13:38:01,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +13: [2023-05-25 13:38:01,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +27: [2023-05-25 13:38:01,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +14: [2023-05-25 13:38:01,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +14: [2023-05-25 13:38:01,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:01,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +10: [2023-05-25 13:38:01,557] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:01,557] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... + 9: [2023-05-25 13:38:01,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +10: [2023-05-25 13:38:01,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,557] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +10: [2023-05-25 13:38:01,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:01,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... + 6: [2023-05-25 13:38:01,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +10: [2023-05-25 13:38:01,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +10: [2023-05-25 13:38:01,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +24: [2023-05-25 13:38:01,563] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +10: [2023-05-25 13:38:01,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +24: [2023-05-25 13:38:01,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +31: [2023-05-25 13:38:01,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +15: [2023-05-25 13:38:01,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +31: [2023-05-25 13:38:01,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +12: [2023-05-25 13:38:01,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +15: [2023-05-25 13:38:01,566] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +15: [2023-05-25 13:38:01,567] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +12: [2023-05-25 13:38:01,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +15: [2023-05-25 13:38:01,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +27: [2023-05-25 13:38:01,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +27: [2023-05-25 13:38:01,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +12: [2023-05-25 13:38:01,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,573] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +26: [2023-05-25 13:38:01,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +12: [2023-05-25 13:38:01,574] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +26: [2023-05-25 13:38:01,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. + 6: [2023-05-25 13:38:01,574] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +13: [2023-05-25 13:38:01,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:01,575] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +31: [2023-05-25 13:38:01,576] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +13: [2023-05-25 13:38:01,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +13: [2023-05-25 13:38:01,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +19: [2023-05-25 13:38:01,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. +29: [2023-05-25 13:38:01,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +29: [2023-05-25 13:38:01,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. + 5: [2023-05-25 13:38:01,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +24: [2023-05-25 13:38:01,579] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +24: [2023-05-25 13:38:01,579] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +19: [2023-05-25 13:38:01,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. + 5: [2023-05-25 13:38:01,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +31: [2023-05-25 13:38:01,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. + 7: [2023-05-25 13:38:01,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +15: [2023-05-25 13:38:01,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. +15: [2023-05-25 13:38:01,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +10: [2023-05-25 13:38:01,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:01,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +26: [2023-05-25 13:38:01,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +29: [2023-05-25 13:38:01,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +10: [2023-05-25 13:38:01,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +10: [2023-05-25 13:38:01,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +29: [2023-05-25 13:38:01,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +29: [2023-05-25 13:38:01,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +26: [2023-05-25 13:38:01,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +23: [2023-05-25 13:38:01,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. +25: [2023-05-25 13:38:01,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +23: [2023-05-25 13:38:01,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. +29: [2023-05-25 13:38:01,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +25: [2023-05-25 13:38:01,593] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +19: [2023-05-25 13:38:01,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +19: [2023-05-25 13:38:01,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +30: [2023-05-25 13:38:01,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +17: [2023-05-25 13:38:01,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. +17: [2023-05-25 13:38:01,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. +30: [2023-05-25 13:38:01,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +31: [2023-05-25 13:38:01,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +31: [2023-05-25 13:38:01,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +22: [2023-05-25 13:38:01,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. +22: [2023-05-25 13:38:01,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. + 7: [2023-05-25 13:38:01,598] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +30: [2023-05-25 13:38:01,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +30: [2023-05-25 13:38:01,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +15: [2023-05-25 13:38:01,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +15: [2023-05-25 13:38:01,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +24: [2023-05-25 13:38:01,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +24: [2023-05-25 13:38:01,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +29: [2023-05-25 13:38:01,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +28: [2023-05-25 13:38:01,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +29: [2023-05-25 13:38:01,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +28: [2023-05-25 13:38:01,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +18: [2023-05-25 13:38:01,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. +23: [2023-05-25 13:38:01,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +25: [2023-05-25 13:38:01,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +25: [2023-05-25 13:38:01,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +23: [2023-05-25 13:38:01,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +18: [2023-05-25 13:38:01,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. + 6: [2023-05-25 13:38:01,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 6: [2023-05-25 13:38:01,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. +17: [2023-05-25 13:38:01,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +17: [2023-05-25 13:38:01,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +25: [2023-05-25 13:38:01,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +25: [2023-05-25 13:38:01,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +30: [2023-05-25 13:38:01,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +31: [2023-05-25 13:38:01,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +31: [2023-05-25 13:38:01,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:01,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:01,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:01,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +22: [2023-05-25 13:38:01,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +22: [2023-05-25 13:38:01,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +24: [2023-05-25 13:38:01,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +21: [2023-05-25 13:38:01,617] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. +21: [2023-05-25 13:38:01,617] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. +18: [2023-05-25 13:38:01,617] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +24: [2023-05-25 13:38:01,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +18: [2023-05-25 13:38:01,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +16: [2023-05-25 13:38:01,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. + 6: [2023-05-25 13:38:01,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +13: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +13: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +14: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. + 9: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +14: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +12: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +11: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +15: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +16: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +11: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +12: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +15: [2023-05-25 13:38:01,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +20: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +10: [2023-05-25 13:38:01,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +20: [2023-05-25 13:38:01,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +10: [2023-05-25 13:38:01,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +25: [2023-05-25 13:38:01,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +25: [2023-05-25 13:38:01,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +28: [2023-05-25 13:38:01,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +28: [2023-05-25 13:38:01,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +15: [2023-05-25 13:38:01,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +21: [2023-05-25 13:38:01,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +30: [2023-05-25 13:38:01,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +30: [2023-05-25 13:38:01,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +15: [2023-05-25 13:38:01,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +21: [2023-05-25 13:38:01,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 2: [2023-05-25 13:38:01,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 4: [2023-05-25 13:38:01,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +15: [2023-05-25 13:38:01,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 5: [2023-05-25 13:38:01,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. +16: [2023-05-25 13:38:01,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. + 3: [2023-05-25 13:38:01,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +15: [2023-05-25 13:38:01,636] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +11: [2023-05-25 13:38:01,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:01,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +12: [2023-05-25 13:38:01,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +16: [2023-05-25 13:38:01,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +14: [2023-05-25 13:38:01,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:01,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +12: [2023-05-25 13:38:01,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +15: [2023-05-25 13:38:01,639] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +10: [2023-05-25 13:38:01,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +10: [2023-05-25 13:38:01,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:01,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:01,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +15: [2023-05-25 13:38:01,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +20: [2023-05-25 13:38:01,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +20: [2023-05-25 13:38:01,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,642] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +27: [2023-05-25 13:38:01,642] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +27: [2023-05-25 13:38:01,643] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +30: [2023-05-25 13:38:01,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:01,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,646] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,646] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,646] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +20: [2023-05-25 13:38:01,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +20: [2023-05-25 13:38:01,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. + 2: [2023-05-25 13:38:01,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +28: [2023-05-25 13:38:01,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +28: [2023-05-25 13:38:01,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +28: [2023-05-25 13:38:01,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +28: [2023-05-25 13:38:01,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 7: [2023-05-25 13:38:01,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 3: [2023-05-25 13:38:01,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +28: [2023-05-25 13:38:01,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +28: [2023-05-25 13:38:01,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 6: [2023-05-25 13:38:01,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... +27: [2023-05-25 13:38:01,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +28: [2023-05-25 13:38:01,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +28: [2023-05-25 13:38:01,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 6: [2023-05-25 13:38:01,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... + 6: [2023-05-25 13:38:01,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. +14: [2023-05-25 13:38:01,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:01,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:01,660] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +11: [2023-05-25 13:38:01,660] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 5: [2023-05-25 13:38:01,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 7: [2023-05-25 13:38:01,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 0: [2023-05-25 13:38:01,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +28: [2023-05-25 13:38:01,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +27: [2023-05-25 13:38:01,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +23: [2023-05-25 13:38:01,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. + 5: [2023-05-25 13:38:01,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. +23: [2023-05-25 13:38:01,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +20: [2023-05-25 13:38:01,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +28: [2023-05-25 13:38:01,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +11: [2023-05-25 13:38:01,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:01,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +20: [2023-05-25 13:38:01,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +12: [2023-05-25 13:38:01,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:01,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +28: [2023-05-25 13:38:01,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +14: [2023-05-25 13:38:01,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +12: [2023-05-25 13:38:01,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +12: [2023-05-25 13:38:01,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +28: [2023-05-25 13:38:01,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 3: [2023-05-25 13:38:01,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. +12: [2023-05-25 13:38:01,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +10: [2023-05-25 13:38:01,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +15: [2023-05-25 13:38:01,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:01,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:01,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 1: [2023-05-25 13:38:01,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +15: [2023-05-25 13:38:01,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:01,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +23: [2023-05-25 13:38:01,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:01,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:01,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +15: [2023-05-25 13:38:01,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +13: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +13: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +18: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +15: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +18: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +23: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +23: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +23: [2023-05-25 13:38:01,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +23: [2023-05-25 13:38:01,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +23: [2023-05-25 13:38:01,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +26: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +26: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +23: [2023-05-25 13:38:01,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:01,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +26: [2023-05-25 13:38:01,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +26: [2023-05-25 13:38:01,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +26: [2023-05-25 13:38:01,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +26: [2023-05-25 13:38:01,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +26: [2023-05-25 13:38:01,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +26: [2023-05-25 13:38:01,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +10: [2023-05-25 13:38:01,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... + 8: [2023-05-25 13:38:01,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +19: [2023-05-25 13:38:01,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +19: [2023-05-25 13:38:01,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. + 6: [2023-05-25 13:38:01,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +17: [2023-05-25 13:38:01,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. + 8: [2023-05-25 13:38:01,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +17: [2023-05-25 13:38:01,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. + 3: [2023-05-25 13:38:01,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... + 9: [2023-05-25 13:38:01,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +18: [2023-05-25 13:38:01,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +28: [2023-05-25 13:38:01,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +18: [2023-05-25 13:38:01,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 4: [2023-05-25 13:38:01,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +28: [2023-05-25 13:38:01,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +30: [2023-05-25 13:38:01,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +30: [2023-05-25 13:38:01,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +30: [2023-05-25 13:38:01,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +30: [2023-05-25 13:38:01,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +30: [2023-05-25 13:38:01,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +30: [2023-05-25 13:38:01,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +30: [2023-05-25 13:38:01,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +30: [2023-05-25 13:38:01,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... +16: [2023-05-25 13:38:01,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +16: [2023-05-25 13:38:01,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +19: [2023-05-25 13:38:01,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +19: [2023-05-25 13:38:01,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +29: [2023-05-25 13:38:01,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +29: [2023-05-25 13:38:01,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. + 6: [2023-05-25 13:38:01,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... +30: [2023-05-25 13:38:01,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:01,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:01,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +30: [2023-05-25 13:38:01,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +30: [2023-05-25 13:38:01,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +30: [2023-05-25 13:38:01,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +30: [2023-05-25 13:38:01,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +30: [2023-05-25 13:38:01,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +22: [2023-05-25 13:38:01,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +22: [2023-05-25 13:38:01,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. + 1: [2023-05-25 13:38:01,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 3: [2023-05-25 13:38:01,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 7: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +23: [2023-05-25 13:38:01,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. + 1: [2023-05-25 13:38:01,700] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +23: [2023-05-25 13:38:01,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. + 5: [2023-05-25 13:38:01,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... + 7: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +17: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... +23: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +17: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +28: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 5: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 2: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,706] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +23: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +16: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... +29: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... +29: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +16: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... +23: [2023-05-25 13:38:01,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... +23: [2023-05-25 13:38:01,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... + 6: [2023-05-25 13:38:01,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +26: [2023-05-25 13:38:01,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:01,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +28: [2023-05-25 13:38:01,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 6: [2023-05-25 13:38:01,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 5: [2023-05-25 13:38:01,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... +21: [2023-05-25 13:38:01,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +22: [2023-05-25 13:38:01,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +21: [2023-05-25 13:38:01,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +19: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +19: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... +23: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +19: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +22: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +19: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +19: [2023-05-25 13:38:01,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +19: [2023-05-25 13:38:01,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +19: [2023-05-25 13:38:01,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... +19: [2023-05-25 13:38:01,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 0: [2023-05-25 13:38:01,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 3: [2023-05-25 13:38:01,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +19: [2023-05-25 13:38:01,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:01,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +22: [2023-05-25 13:38:01,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +22: [2023-05-25 13:38:01,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +22: [2023-05-25 13:38:01,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +22: [2023-05-25 13:38:01,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +19: [2023-05-25 13:38:01,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +26: [2023-05-25 13:38:01,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +22: [2023-05-25 13:38:01,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +26: [2023-05-25 13:38:01,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +22: [2023-05-25 13:38:01,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... +22: [2023-05-25 13:38:01,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +22: [2023-05-25 13:38:01,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +16: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +16: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +16: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:01,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +16: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +20: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +17: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +17: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +17: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +17: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +30: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +20: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 4: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. +20: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +20: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +30: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +18: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +18: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +18: [2023-05-25 13:38:01,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +16: [2023-05-25 13:38:01,727] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +20: [2023-05-25 13:38:01,727] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +16: [2023-05-25 13:38:01,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +16: [2023-05-25 13:38:01,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +16: [2023-05-25 13:38:01,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +19: [2023-05-25 13:38:01,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:01,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +21: [2023-05-25 13:38:01,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +21: [2023-05-25 13:38:01,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +21: [2023-05-25 13:38:01,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:01,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +20: [2023-05-25 13:38:01,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +21: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +21: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +21: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +20: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... +17: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +17: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +17: [2023-05-25 13:38:01,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +19: [2023-05-25 13:38:01,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... +17: [2023-05-25 13:38:01,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... + 3: [2023-05-25 13:38:01,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +18: [2023-05-25 13:38:01,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +21: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +23: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +18: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +21: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +21: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +18: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +21: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 1: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. +17: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... +18: [2023-05-25 13:38:01,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +18: [2023-05-25 13:38:01,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +18: [2023-05-25 13:38:01,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... + 1: [2023-05-25 13:38:01,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 7: [2023-05-25 13:38:01,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 2: [2023-05-25 13:38:01,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +22: [2023-05-25 13:38:01,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +17: [2023-05-25 13:38:01,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +22: [2023-05-25 13:38:01,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. + 5: [2023-05-25 13:38:01,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +16: [2023-05-25 13:38:01,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +29: [2023-05-25 13:38:01,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:01,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +24: [2023-05-25 13:38:01,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +30: [2023-05-25 13:38:01,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +24: [2023-05-25 13:38:01,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +17: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... +16: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +30: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +29: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +29: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +29: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +29: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +23: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +17: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... +21: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +16: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... +21: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +20: [2023-05-25 13:38:01,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +16: [2023-05-25 13:38:01,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. +26: [2023-05-25 13:38:01,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +26: [2023-05-25 13:38:01,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. + 0: [2023-05-25 13:38:01,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. +31: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +31: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +29: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +23: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +29: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +29: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +31: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +31: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +31: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +31: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +22: [2023-05-25 13:38:01,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +29: [2023-05-25 13:38:01,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... + 5: [2023-05-25 13:38:01,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +19: [2023-05-25 13:38:01,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:01,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +23: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +24: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +24: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +24: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +24: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +24: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +28: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +18: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +24: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +27: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +27: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +27: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +27: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +27: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +25: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +25: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +25: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +28: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +27: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +25: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +25: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +23: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +25: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +24: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +31: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +31: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +31: [2023-05-25 13:38:01,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +19: [2023-05-25 13:38:01,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +31: [2023-05-25 13:38:01,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +24: [2023-05-25 13:38:01,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +22: [2023-05-25 13:38:01,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... +18: [2023-05-25 13:38:01,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +27: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +24: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +24: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +24: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +27: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +24: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +31: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +31: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +31: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +31: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +16: [2023-05-25 13:38:01,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +16: [2023-05-25 13:38:01,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +27: [2023-05-25 13:38:01,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +27: [2023-05-25 13:38:01,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +27: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +27: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +25: [2023-05-25 13:38:01,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +25: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +25: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... + 4: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +25: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +25: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +22: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +25: [2023-05-25 13:38:01,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +25: [2023-05-25 13:38:01,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +25: [2023-05-25 13:38:01,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +24: [2023-05-25 13:38:01,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +22: [2023-05-25 13:38:01,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +24: [2023-05-25 13:38:01,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +22: [2023-05-25 13:38:01,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +22: [2023-05-25 13:38:01,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +22: [2023-05-25 13:38:01,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +21: [2023-05-25 13:38:01,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +21: [2023-05-25 13:38:01,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +19: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +17: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +26: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +17: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +21: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +26: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +20: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +16: [2023-05-25 13:38:01,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... +21: [2023-05-25 13:38:01,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +19: [2023-05-25 13:38:01,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +31: [2023-05-25 13:38:01,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +22: [2023-05-25 13:38:01,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... +16: [2023-05-25 13:38:01,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +16: [2023-05-25 13:38:01,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +20: [2023-05-25 13:38:01,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +31: [2023-05-25 13:38:01,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +16: [2023-05-25 13:38:01,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +21: [2023-05-25 13:38:01,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... +21: [2023-05-25 13:38:01,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +28: [2023-05-25 13:38:01,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +21: [2023-05-25 13:38:01,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +21: [2023-05-25 13:38:01,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... +28: [2023-05-25 13:38:01,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +17: [2023-05-25 13:38:01,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +17: [2023-05-25 13:38:01,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:01,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +25: [2023-05-25 13:38:01,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +20: [2023-05-25 13:38:01,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +22: [2023-05-25 13:38:01,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +18: [2023-05-25 13:38:01,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +16: [2023-05-25 13:38:01,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +20: [2023-05-25 13:38:01,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +29: [2023-05-25 13:38:01,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:01,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +25: [2023-05-25 13:38:01,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +20: [2023-05-25 13:38:01,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +27: [2023-05-25 13:38:01,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +16: [2023-05-25 13:38:01,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +25: [2023-05-25 13:38:01,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +20: [2023-05-25 13:38:01,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +22: [2023-05-25 13:38:01,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +17: [2023-05-25 13:38:01,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +21: [2023-05-25 13:38:01,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... + 1: [2023-05-25 13:38:01,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... +20: [2023-05-25 13:38:01,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +27: [2023-05-25 13:38:01,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +21: [2023-05-25 13:38:01,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +31: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +21: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +18: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +11: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +31: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +20: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +17: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +24: [2023-05-25 13:38:01,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +24: [2023-05-25 13:38:01,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... + 4: [2023-05-25 13:38:01,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... +12: [2023-05-25 13:38:01,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +11: [2023-05-25 13:38:01,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +12: [2023-05-25 13:38:01,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +24: [2023-05-25 13:38:01,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +11: [2023-05-25 13:38:01,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +29: [2023-05-25 13:38:01,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +29: [2023-05-25 13:38:01,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +24: [2023-05-25 13:38:01,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +21: [2023-05-25 13:38:01,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... + 4: [2023-05-25 13:38:01,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +11: [2023-05-25 13:38:01,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +22: [2023-05-25 13:38:01,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +24: [2023-05-25 13:38:01,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +12: [2023-05-25 13:38:01,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +12: [2023-05-25 13:38:01,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +18: [2023-05-25 13:38:01,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +25: [2023-05-25 13:38:01,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... +24: [2023-05-25 13:38:01,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +26: [2023-05-25 13:38:01,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:01,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... +17: [2023-05-25 13:38:01,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +17: [2023-05-25 13:38:01,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +22: [2023-05-25 13:38:01,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +21: [2023-05-25 13:38:01,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:01,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +18: [2023-05-25 13:38:01,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +18: [2023-05-25 13:38:01,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +21: [2023-05-25 13:38:01,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +26: [2023-05-25 13:38:01,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +26: [2023-05-25 13:38:01,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +14: [2023-05-25 13:38:01,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +14: [2023-05-25 13:38:01,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... +31: [2023-05-25 13:38:01,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +14: [2023-05-25 13:38:01,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +27: [2023-05-25 13:38:01,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +31: [2023-05-25 13:38:01,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +22: [2023-05-25 13:38:01,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +22: [2023-05-25 13:38:01,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +27: [2023-05-25 13:38:01,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +13: [2023-05-25 13:38:01,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +16: [2023-05-25 13:38:01,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +31: [2023-05-25 13:38:01,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +13: [2023-05-25 13:38:01,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +24: [2023-05-25 13:38:01,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +24: [2023-05-25 13:38:01,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +31: [2023-05-25 13:38:01,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +15: [2023-05-25 13:38:01,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +31: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +15: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... +16: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +16: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +13: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +31: [2023-05-25 13:38:01,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... + 9: [2023-05-25 13:38:01,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:01,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +28: [2023-05-25 13:38:01,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +15: [2023-05-25 13:38:01,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +25: [2023-05-25 13:38:01,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +16: [2023-05-25 13:38:01,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +15: [2023-05-25 13:38:01,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +28: [2023-05-25 13:38:01,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +25: [2023-05-25 13:38:01,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +28: [2023-05-25 13:38:01,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +28: [2023-05-25 13:38:01,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +19: [2023-05-25 13:38:01,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +25: [2023-05-25 13:38:01,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +19: [2023-05-25 13:38:01,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +25: [2023-05-25 13:38:01,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +27: [2023-05-25 13:38:01,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +27: [2023-05-25 13:38:01,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +25: [2023-05-25 13:38:01,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +17: [2023-05-25 13:38:01,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +25: [2023-05-25 13:38:01,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +17: [2023-05-25 13:38:01,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +11: [2023-05-25 13:38:01,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +17: [2023-05-25 13:38:01,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +11: [2023-05-25 13:38:01,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +19: [2023-05-25 13:38:01,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +10: [2023-05-25 13:38:01,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +19: [2023-05-25 13:38:01,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +10: [2023-05-25 13:38:01,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +17: [2023-05-25 13:38:01,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +13: [2023-05-25 13:38:01,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +27: [2023-05-25 13:38:01,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +13: [2023-05-25 13:38:01,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +11: [2023-05-25 13:38:01,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +11: [2023-05-25 13:38:01,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +27: [2023-05-25 13:38:01,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +14: [2023-05-25 13:38:01,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +14: [2023-05-25 13:38:01,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +12: [2023-05-25 13:38:01,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +12: [2023-05-25 13:38:01,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +11: [2023-05-25 13:38:01,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +11: [2023-05-25 13:38:01,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:01,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +15: [2023-05-25 13:38:01,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +15: [2023-05-25 13:38:01,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +14: [2023-05-25 13:38:01,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +14: [2023-05-25 13:38:01,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:01,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +13: [2023-05-25 13:38:01,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +13: [2023-05-25 13:38:01,843] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +11: [2023-05-25 13:38:01,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +11: [2023-05-25 13:38:01,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,846] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +19: [2023-05-25 13:38:01,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +12: [2023-05-25 13:38:01,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +19: [2023-05-25 13:38:01,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +12: [2023-05-25 13:38:01,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +19: [2023-05-25 13:38:01,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +10: [2023-05-25 13:38:01,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +10: [2023-05-25 13:38:01,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +19: [2023-05-25 13:38:01,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +15: [2023-05-25 13:38:01,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +27: [2023-05-25 13:38:01,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +15: [2023-05-25 13:38:01,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +27: [2023-05-25 13:38:01,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +27: [2023-05-25 13:38:01,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +13: [2023-05-25 13:38:01,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:01,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +27: [2023-05-25 13:38:01,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +11: [2023-05-25 13:38:01,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,866] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +14: [2023-05-25 13:38:01,866] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +10: [2023-05-25 13:38:01,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +11: [2023-05-25 13:38:01,868] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +11: [2023-05-25 13:38:01,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +11: [2023-05-25 13:38:01,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +13: [2023-05-25 13:38:01,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +13: [2023-05-25 13:38:01,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:01,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +14: [2023-05-25 13:38:01,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +13: [2023-05-25 13:38:01,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +15: [2023-05-25 13:38:01,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +15: [2023-05-25 13:38:01,884] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +13: [2023-05-25 13:38:01,885] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +13: [2023-05-25 13:38:01,885] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +10: [2023-05-25 13:38:01,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:01,886] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +12: [2023-05-25 13:38:01,886] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +14: [2023-05-25 13:38:01,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +14: [2023-05-25 13:38:01,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,894] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,896] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +15: [2023-05-25 13:38:01,897] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +15: [2023-05-25 13:38:01,898] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +13: [2023-05-25 13:38:01,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +13: [2023-05-25 13:38:01,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +10: [2023-05-25 13:38:01,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +14: [2023-05-25 13:38:01,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +12: [2023-05-25 13:38:01,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +12: [2023-05-25 13:38:01,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,904] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +14: [2023-05-25 13:38:01,904] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +11: [2023-05-25 13:38:01,905] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +11: [2023-05-25 13:38:01,905] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +15: [2023-05-25 13:38:01,906] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +15: [2023-05-25 13:38:01,906] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +10: [2023-05-25 13:38:01,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +10: [2023-05-25 13:38:01,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +14: [2023-05-25 13:38:01,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... + 9: [2023-05-25 13:38:01,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +15: [2023-05-25 13:38:01,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +15: [2023-05-25 13:38:01,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +14: [2023-05-25 13:38:01,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +12: [2023-05-25 13:38:01,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +12: [2023-05-25 13:38:01,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. + 4: [2023-05-25 13:38:01,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +14: [2023-05-25 13:38:01,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +11: [2023-05-25 13:38:01,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +11: [2023-05-25 13:38:01,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +15: [2023-05-25 13:38:01,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +15: [2023-05-25 13:38:01,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. + 6: [2023-05-25 13:38:01,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. +10: [2023-05-25 13:38:01,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +10: [2023-05-25 13:38:01,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +15: [2023-05-25 13:38:01,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +13: [2023-05-25 13:38:01,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +15: [2023-05-25 13:38:01,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +13: [2023-05-25 13:38:01,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +15: [2023-05-25 13:38:01,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +14: [2023-05-25 13:38:01,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +13: [2023-05-25 13:38:01,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +15: [2023-05-25 13:38:01,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +13: [2023-05-25 13:38:01,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +14: [2023-05-25 13:38:01,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... +12: [2023-05-25 13:38:01,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +14: [2023-05-25 13:38:01,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +15: [2023-05-25 13:38:01,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +12: [2023-05-25 13:38:01,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +12: [2023-05-25 13:38:01,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. +10: [2023-05-25 13:38:01,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +15: [2023-05-25 13:38:01,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... + 2: [2023-05-25 13:38:01,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. +12: [2023-05-25 13:38:01,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +10: [2023-05-25 13:38:01,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +10: [2023-05-25 13:38:01,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +12: [2023-05-25 13:38:01,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +12: [2023-05-25 13:38:01,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... + 9: [2023-05-25 13:38:01,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +23: [2023-05-25 13:38:01,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +23: [2023-05-25 13:38:01,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +26: [2023-05-25 13:38:01,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +30: [2023-05-25 13:38:01,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +26: [2023-05-25 13:38:01,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +28: [2023-05-25 13:38:01,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +28: [2023-05-25 13:38:01,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +25: [2023-05-25 13:38:01,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +29: [2023-05-25 13:38:01,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +25: [2023-05-25 13:38:01,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +30: [2023-05-25 13:38:01,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +29: [2023-05-25 13:38:01,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +22: [2023-05-25 13:38:01,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +22: [2023-05-25 13:38:01,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. + 5: [2023-05-25 13:38:01,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. + 5: [2023-05-25 13:38:01,947] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. +11: [2023-05-25 13:38:01,947] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +11: [2023-05-25 13:38:01,947] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,948] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +26: [2023-05-25 13:38:01,948] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +26: [2023-05-25 13:38:01,949] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +11: [2023-05-25 13:38:01,950] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... +11: [2023-05-25 13:38:01,950] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,950] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... + 2: [2023-05-25 13:38:01,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +23: [2023-05-25 13:38:01,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +23: [2023-05-25 13:38:01,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,952] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. + 5: [2023-05-25 13:38:01,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. + 6: [2023-05-25 13:38:01,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. +29: [2023-05-25 13:38:01,954] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +29: [2023-05-25 13:38:01,954] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +26: [2023-05-25 13:38:01,955] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,955] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. +26: [2023-05-25 13:38:01,955] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,955] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +30: [2023-05-25 13:38:01,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +22: [2023-05-25 13:38:01,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +10: [2023-05-25 13:38:01,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +10: [2023-05-25 13:38:01,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +25: [2023-05-25 13:38:01,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +30: [2023-05-25 13:38:01,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +28: [2023-05-25 13:38:01,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +15: [2023-05-25 13:38:01,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +25: [2023-05-25 13:38:01,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,958] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. + 3: [2023-05-25 13:38:01,958] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. +22: [2023-05-25 13:38:01,959] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +15: [2023-05-25 13:38:01,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,961] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +26: [2023-05-25 13:38:01,962] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +28: [2023-05-25 13:38:01,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +15: [2023-05-25 13:38:01,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... +15: [2023-05-25 13:38:01,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... + 9: [2023-05-25 13:38:01,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. + 7: [2023-05-25 13:38:01,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +15: [2023-05-25 13:38:01,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +12: [2023-05-25 13:38:01,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +26: [2023-05-25 13:38:01,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... + 9: [2023-05-25 13:38:01,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... +10: [2023-05-25 13:38:01,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. + 0: [2023-05-25 13:38:01,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +15: [2023-05-25 13:38:01,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +15: [2023-05-25 13:38:01,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +10: [2023-05-25 13:38:01,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... +12: [2023-05-25 13:38:01,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... +24: [2023-05-25 13:38:01,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +15: [2023-05-25 13:38:01,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... + 6: [2023-05-25 13:38:01,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +24: [2023-05-25 13:38:01,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. + 7: [2023-05-25 13:38:01,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +22: [2023-05-25 13:38:01,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +22: [2023-05-25 13:38:01,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +29: [2023-05-25 13:38:01,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. + 6: [2023-05-25 13:38:01,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +29: [2023-05-25 13:38:01,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. + 3: [2023-05-25 13:38:01,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +12: [2023-05-25 13:38:01,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +17: [2023-05-25 13:38:01,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +12: [2023-05-25 13:38:01,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... +17: [2023-05-25 13:38:01,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +27: [2023-05-25 13:38:01,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +27: [2023-05-25 13:38:01,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +20: [2023-05-25 13:38:01,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +20: [2023-05-25 13:38:01,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. + 3: [2023-05-25 13:38:01,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +10: [2023-05-25 13:38:01,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. + 7: [2023-05-25 13:38:01,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +13: [2023-05-25 13:38:01,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. + 9: [2023-05-25 13:38:01,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +13: [2023-05-25 13:38:01,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +21: [2023-05-25 13:38:01,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +14: [2023-05-25 13:38:01,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +23: [2023-05-25 13:38:01,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. + 1: [2023-05-25 13:38:01,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. + 1: [2023-05-25 13:38:01,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. +23: [2023-05-25 13:38:01,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +21: [2023-05-25 13:38:01,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. + 6: [2023-05-25 13:38:01,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +14: [2023-05-25 13:38:01,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. + 0: [2023-05-25 13:38:01,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +11: [2023-05-25 13:38:01,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +11: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. + 6: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +18: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +24: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +18: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +29: [2023-05-25 13:38:01,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... +24: [2023-05-25 13:38:01,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 6: [2023-05-25 13:38:01,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +22: [2023-05-25 13:38:01,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +22: [2023-05-25 13:38:01,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +29: [2023-05-25 13:38:01,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +19: [2023-05-25 13:38:01,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +10: [2023-05-25 13:38:01,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +19: [2023-05-25 13:38:01,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +27: [2023-05-25 13:38:01,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +17: [2023-05-25 13:38:01,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +10: [2023-05-25 13:38:01,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +17: [2023-05-25 13:38:01,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +31: [2023-05-25 13:38:01,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +30: [2023-05-25 13:38:01,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +30: [2023-05-25 13:38:01,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +31: [2023-05-25 13:38:01,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +12: [2023-05-25 13:38:01,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +12: [2023-05-25 13:38:01,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. + 1: [2023-05-25 13:38:01,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +23: [2023-05-25 13:38:01,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +10: [2023-05-25 13:38:01,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... + 8: [2023-05-25 13:38:01,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. + 8: [2023-05-25 13:38:01,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +27: [2023-05-25 13:38:01,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +31: [2023-05-25 13:38:01,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +31: [2023-05-25 13:38:01,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +14: [2023-05-25 13:38:01,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +21: [2023-05-25 13:38:01,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +23: [2023-05-25 13:38:01,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +13: [2023-05-25 13:38:01,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +21: [2023-05-25 13:38:01,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +20: [2023-05-25 13:38:01,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +20: [2023-05-25 13:38:01,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +14: [2023-05-25 13:38:01,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +13: [2023-05-25 13:38:01,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +11: [2023-05-25 13:38:01,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +17: [2023-05-25 13:38:01,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +11: [2023-05-25 13:38:01,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +17: [2023-05-25 13:38:01,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +28: [2023-05-25 13:38:01,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +28: [2023-05-25 13:38:01,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. + 2: [2023-05-25 13:38:01,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. + 2: [2023-05-25 13:38:01,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. +18: [2023-05-25 13:38:02,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +18: [2023-05-25 13:38:02,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. + 2: [2023-05-25 13:38:02,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +18: [2023-05-25 13:38:02,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +19: [2023-05-25 13:38:02,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +18: [2023-05-25 13:38:02,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +16: [2023-05-25 13:38:02,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +16: [2023-05-25 13:38:02,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +19: [2023-05-25 13:38:02,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +19: [2023-05-25 13:38:02,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +19: [2023-05-25 13:38:02,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +30: [2023-05-25 13:38:02,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +30: [2023-05-25 13:38:02,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +31: [2023-05-25 13:38:02,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +31: [2023-05-25 13:38:02,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +31: [2023-05-25 13:38:02,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +22: [2023-05-25 13:38:02,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +12: [2023-05-25 13:38:02,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +22: [2023-05-25 13:38:02,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +12: [2023-05-25 13:38:02,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +26: [2023-05-25 13:38:02,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +26: [2023-05-25 13:38:02,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +23: [2023-05-25 13:38:02,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +23: [2023-05-25 13:38:02,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +24: [2023-05-25 13:38:02,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +16: [2023-05-25 13:38:02,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +24: [2023-05-25 13:38:02,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +31: [2023-05-25 13:38:02,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... +16: [2023-05-25 13:38:02,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +18: [2023-05-25 13:38:02,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +18: [2023-05-25 13:38:02,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. + 1: [2023-05-25 13:38:02,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. + 1: [2023-05-25 13:38:02,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. + 9: [2023-05-25 13:38:02,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +17: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +16: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. + 7: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. + 2: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. +28: [2023-05-25 13:38:02,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +17: [2023-05-25 13:38:02,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +16: [2023-05-25 13:38:02,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +28: [2023-05-25 13:38:02,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +16: [2023-05-25 13:38:02,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +21: [2023-05-25 13:38:02,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +21: [2023-05-25 13:38:02,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +16: [2023-05-25 13:38:02,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +18: [2023-05-25 13:38:02,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +19: [2023-05-25 13:38:02,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +30: [2023-05-25 13:38:02,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +19: [2023-05-25 13:38:02,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +30: [2023-05-25 13:38:02,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... +18: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +22: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +20: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +23: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +23: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +20: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... +26: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +26: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +22: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +14: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +21: [2023-05-25 13:38:02,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +21: [2023-05-25 13:38:02,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +24: [2023-05-25 13:38:02,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +27: [2023-05-25 13:38:02,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +27: [2023-05-25 13:38:02,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +16: [2023-05-25 13:38:02,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. +14: [2023-05-25 13:38:02,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +14: [2023-05-25 13:38:02,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... + 0: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. + 7: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +15: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +11: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +15: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +27: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +16: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +27: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +13: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +13: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +24: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +14: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +16: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +15: [2023-05-25 13:38:02,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +15: [2023-05-25 13:38:02,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +11: [2023-05-25 13:38:02,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +16: [2023-05-25 13:38:02,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +28: [2023-05-25 13:38:02,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +28: [2023-05-25 13:38:02,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +11: [2023-05-25 13:38:02,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +18: [2023-05-25 13:38:02,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +29: [2023-05-25 13:38:02,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +29: [2023-05-25 13:38:02,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +21: [2023-05-25 13:38:02,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +13: [2023-05-25 13:38:02,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +18: [2023-05-25 13:38:02,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +13: [2023-05-25 13:38:02,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +11: [2023-05-25 13:38:02,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +29: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +29: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +29: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +28: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +29: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +28: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +28: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. +30: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. +28: [2023-05-25 13:38:02,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +29: [2023-05-25 13:38:02,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +28: [2023-05-25 13:38:02,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +27: [2023-05-25 13:38:02,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +29: [2023-05-25 13:38:02,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +27: [2023-05-25 13:38:02,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +28: [2023-05-25 13:38:02,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +21: [2023-05-25 13:38:02,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +30: [2023-05-25 13:38:02,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +29: [2023-05-25 13:38:02,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... +29: [2023-05-25 13:38:02,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... +29: [2023-05-25 13:38:02,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +29: [2023-05-25 13:38:02,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +19: [2023-05-25 13:38:02,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +19: [2023-05-25 13:38:02,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +28: [2023-05-25 13:38:02,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +27: [2023-05-25 13:38:02,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +27: [2023-05-25 13:38:02,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +28: [2023-05-25 13:38:02,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +21: [2023-05-25 13:38:02,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +25: [2023-05-25 13:38:02,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:02,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:02,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +31: [2023-05-25 13:38:02,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +31: [2023-05-25 13:38:02,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +12: [2023-05-25 13:38:02,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +21: [2023-05-25 13:38:02,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +25: [2023-05-25 13:38:02,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +30: [2023-05-25 13:38:02,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:02,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +31: [2023-05-25 13:38:02,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +31: [2023-05-25 13:38:02,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +25: [2023-05-25 13:38:02,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +24: [2023-05-25 13:38:02,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +24: [2023-05-25 13:38:02,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +12: [2023-05-25 13:38:02,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... + 8: [2023-05-25 13:38:02,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +25: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +25: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +12: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +30: [2023-05-25 13:38:02,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +30: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +20: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +26: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +26: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +26: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... +30: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +30: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +26: [2023-05-25 13:38:02,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +26: [2023-05-25 13:38:02,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +26: [2023-05-25 13:38:02,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +12: [2023-05-25 13:38:02,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +24: [2023-05-25 13:38:02,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +24: [2023-05-25 13:38:02,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +24: [2023-05-25 13:38:02,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +24: [2023-05-25 13:38:02,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... + 0: [2023-05-25 13:38:02,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +26: [2023-05-25 13:38:02,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +26: [2023-05-25 13:38:02,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +26: [2023-05-25 13:38:02,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +20: [2023-05-25 13:38:02,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +20: [2023-05-25 13:38:02,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +26: [2023-05-25 13:38:02,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +26: [2023-05-25 13:38:02,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +26: [2023-05-25 13:38:02,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +20: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +24: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +24: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +19: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +19: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... + 3: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... +28: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... +28: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 7: [2023-05-25 13:38:02,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 1: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... + 4: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 1: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... +31: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +31: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +31: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... +31: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... + 1: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... + 2: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... + 4: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... + 3: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... +28: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 3: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 3: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +24: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +31: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... + 1: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +28: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +31: [2023-05-25 13:38:02,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +31: [2023-05-25 13:38:02,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +31: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... +28: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 7: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 2: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... +25: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. + 2: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +25: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +28: [2023-05-25 13:38:02,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... + 0: [2023-05-25 13:38:02,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +14: [2023-05-25 13:38:02,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... +14: [2023-05-25 13:38:02,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +24: [2023-05-25 13:38:02,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +24: [2023-05-25 13:38:02,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +14: [2023-05-25 13:38:02,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +14: [2023-05-25 13:38:02,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +26: [2023-05-25 13:38:02,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +26: [2023-05-25 13:38:02,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +24: [2023-05-25 13:38:02,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... + 1: [2023-05-25 13:38:02,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +27: [2023-05-25 13:38:02,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +20: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +29: [2023-05-25 13:38:02,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +29: [2023-05-25 13:38:02,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... + 4: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. + 4: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +26: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... +26: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... +27: [2023-05-25 13:38:02,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +20: [2023-05-25 13:38:02,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +15: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +15: [2023-05-25 13:38:02,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +30: [2023-05-25 13:38:02,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +27: [2023-05-25 13:38:02,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +27: [2023-05-25 13:38:02,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +13: [2023-05-25 13:38:02,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +28: [2023-05-25 13:38:02,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +13: [2023-05-25 13:38:02,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +11: [2023-05-25 13:38:02,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +12: [2023-05-25 13:38:02,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +11: [2023-05-25 13:38:02,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +27: [2023-05-25 13:38:02,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +27: [2023-05-25 13:38:02,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +12: [2023-05-25 13:38:02,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:02,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +25: [2023-05-25 13:38:02,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +28: [2023-05-25 13:38:02,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +13: [2023-05-25 13:38:02,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:02,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +11: [2023-05-25 13:38:02,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +11: [2023-05-25 13:38:02,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +30: [2023-05-25 13:38:02,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. +25: [2023-05-25 13:38:02,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:02,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +31: [2023-05-25 13:38:02,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:02,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +12: [2023-05-25 13:38:02,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +29: [2023-05-25 13:38:02,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +30: [2023-05-25 13:38:02,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +31: [2023-05-25 13:38:02,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +24: [2023-05-25 13:38:02,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +24: [2023-05-25 13:38:02,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +29: [2023-05-25 13:38:02,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +29: [2023-05-25 13:38:02,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +17: [2023-05-25 13:38:02,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +29: [2023-05-25 13:38:02,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +17: [2023-05-25 13:38:02,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +15: [2023-05-25 13:38:02,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +27: [2023-05-25 13:38:02,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +15: [2023-05-25 13:38:02,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +24: [2023-05-25 13:38:02,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +28: [2023-05-25 13:38:02,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +24: [2023-05-25 13:38:02,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... + 4: [2023-05-25 13:38:02,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +26: [2023-05-25 13:38:02,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +27: [2023-05-25 13:38:02,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +28: [2023-05-25 13:38:02,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +26: [2023-05-25 13:38:02,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +27: [2023-05-25 13:38:02,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +28: [2023-05-25 13:38:02,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +27: [2023-05-25 13:38:02,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +25: [2023-05-25 13:38:02,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +30: [2023-05-25 13:38:02,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +28: [2023-05-25 13:38:02,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:02,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +27: [2023-05-25 13:38:02,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +29: [2023-05-25 13:38:02,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +25: [2023-05-25 13:38:02,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +30: [2023-05-25 13:38:02,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +27: [2023-05-25 13:38:02,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +31: [2023-05-25 13:38:02,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +29: [2023-05-25 13:38:02,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. +24: [2023-05-25 13:38:02,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +17: [2023-05-25 13:38:02,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +24: [2023-05-25 13:38:02,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +26: [2023-05-25 13:38:02,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,098] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,098] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +26: [2023-05-25 13:38:02,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +14: [2023-05-25 13:38:02,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +14: [2023-05-25 13:38:02,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +12: [2023-05-25 13:38:02,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +10: [2023-05-25 13:38:02,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:02,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +17: [2023-05-25 13:38:02,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +28: [2023-05-25 13:38:02,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +10: [2023-05-25 13:38:02,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +10: [2023-05-25 13:38:02,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +25: [2023-05-25 13:38:02,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +28: [2023-05-25 13:38:02,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +25: [2023-05-25 13:38:02,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... +12: [2023-05-25 13:38:02,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. +25: [2023-05-25 13:38:02,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +11: [2023-05-25 13:38:02,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +11: [2023-05-25 13:38:02,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +25: [2023-05-25 13:38:02,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... + 8: [2023-05-25 13:38:02,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +14: [2023-05-25 13:38:02,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +14: [2023-05-25 13:38:02,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:02,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +13: [2023-05-25 13:38:02,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +12: [2023-05-25 13:38:02,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +29: [2023-05-25 13:38:02,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +29: [2023-05-25 13:38:02,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +27: [2023-05-25 13:38:02,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +14: [2023-05-25 13:38:02,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +14: [2023-05-25 13:38:02,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +29: [2023-05-25 13:38:02,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +29: [2023-05-25 13:38:02,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +31: [2023-05-25 13:38:02,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +31: [2023-05-25 13:38:02,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +27: [2023-05-25 13:38:02,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +27: [2023-05-25 13:38:02,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... +27: [2023-05-25 13:38:02,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +11: [2023-05-25 13:38:02,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +12: [2023-05-25 13:38:02,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +27: [2023-05-25 13:38:02,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +11: [2023-05-25 13:38:02,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +27: [2023-05-25 13:38:02,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... +27: [2023-05-25 13:38:02,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +27: [2023-05-25 13:38:02,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... + 3: [2023-05-25 13:38:02,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +24: [2023-05-25 13:38:02,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +15: [2023-05-25 13:38:02,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +15: [2023-05-25 13:38:02,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +13: [2023-05-25 13:38:02,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +25: [2023-05-25 13:38:02,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +25: [2023-05-25 13:38:02,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +13: [2023-05-25 13:38:02,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +10: [2023-05-25 13:38:02,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +14: [2023-05-25 13:38:02,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +14: [2023-05-25 13:38:02,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +24: [2023-05-25 13:38:02,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +31: [2023-05-25 13:38:02,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +31: [2023-05-25 13:38:02,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +24: [2023-05-25 13:38:02,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. + 9: [2023-05-25 13:38:02,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +24: [2023-05-25 13:38:02,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... + 0: [2023-05-25 13:38:02,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 0: [2023-05-25 13:38:02,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... +15: [2023-05-25 13:38:02,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +15: [2023-05-25 13:38:02,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +10: [2023-05-25 13:38:02,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +25: [2023-05-25 13:38:02,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +25: [2023-05-25 13:38:02,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +10: [2023-05-25 13:38:02,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +19: [2023-05-25 13:38:02,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:02,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:02,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:02,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:02,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:02,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:02,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:02,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +14: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +19: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... + 9: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +19: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +19: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +19: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +19: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +19: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +19: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +14: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +22: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +22: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:02,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:02,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +22: [2023-05-25 13:38:02,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +23: [2023-05-25 13:38:02,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... + 3: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +22: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +10: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +23: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +23: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +23: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +23: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +22: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +22: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +23: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +23: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +22: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +23: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +22: [2023-05-25 13:38:02,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +22: [2023-05-25 13:38:02,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +22: [2023-05-25 13:38:02,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +14: [2023-05-25 13:38:02,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +14: [2023-05-25 13:38:02,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +31: [2023-05-25 13:38:02,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +31: [2023-05-25 13:38:02,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +31: [2023-05-25 13:38:02,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +15: [2023-05-25 13:38:02,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +12: [2023-05-25 13:38:02,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +12: [2023-05-25 13:38:02,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +31: [2023-05-25 13:38:02,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +15: [2023-05-25 13:38:02,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +15: [2023-05-25 13:38:02,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +15: [2023-05-25 13:38:02,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +11: [2023-05-25 13:38:02,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +10: [2023-05-25 13:38:02,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +11: [2023-05-25 13:38:02,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +10: [2023-05-25 13:38:02,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +25: [2023-05-25 13:38:02,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:02,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:02,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... + 9: [2023-05-25 13:38:02,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +25: [2023-05-25 13:38:02,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +19: [2023-05-25 13:38:02,182] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +13: [2023-05-25 13:38:02,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +13: [2023-05-25 13:38:02,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +23: [2023-05-25 13:38:02,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +17: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +11: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +11: [2023-05-25 13:38:02,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +21: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +21: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +21: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +21: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +21: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +21: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +21: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +16: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +21: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +16: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +22: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +10: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +17: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +19: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +10: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +18: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +18: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +18: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +18: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +18: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +18: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +18: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +18: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +16: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +17: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +17: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +17: [2023-05-25 13:38:02,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +17: [2023-05-25 13:38:02,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +16: [2023-05-25 13:38:02,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +16: [2023-05-25 13:38:02,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +21: [2023-05-25 13:38:02,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +21: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +21: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +21: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +21: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +21: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +21: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +17: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +21: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +17: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +17: [2023-05-25 13:38:02,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +18: [2023-05-25 13:38:02,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +18: [2023-05-25 13:38:02,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +18: [2023-05-25 13:38:02,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +18: [2023-05-25 13:38:02,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +18: [2023-05-25 13:38:02,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +18: [2023-05-25 13:38:02,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +18: [2023-05-25 13:38:02,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +18: [2023-05-25 13:38:02,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +19: [2023-05-25 13:38:02,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:02,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +22: [2023-05-25 13:38:02,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:02,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +13: [2023-05-25 13:38:02,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:02,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +19: [2023-05-25 13:38:02,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +11: [2023-05-25 13:38:02,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +15: [2023-05-25 13:38:02,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +15: [2023-05-25 13:38:02,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +10: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +12: [2023-05-25 13:38:02,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +12: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +11: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +11: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +22: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +11: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +14: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +14: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. + 9: [2023-05-25 13:38:02,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +13: [2023-05-25 13:38:02,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +13: [2023-05-25 13:38:02,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +12: [2023-05-25 13:38:02,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +12: [2023-05-25 13:38:02,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +14: [2023-05-25 13:38:02,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +15: [2023-05-25 13:38:02,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +10: [2023-05-25 13:38:02,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +16: [2023-05-25 13:38:02,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +14: [2023-05-25 13:38:02,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:02,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +15: [2023-05-25 13:38:02,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:02,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +11: [2023-05-25 13:38:02,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:02,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +21: [2023-05-25 13:38:02,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +10: [2023-05-25 13:38:02,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +12: [2023-05-25 13:38:02,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +11: [2023-05-25 13:38:02,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +11: [2023-05-25 13:38:02,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +16: [2023-05-25 13:38:02,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +10: [2023-05-25 13:38:02,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +10: [2023-05-25 13:38:02,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +11: [2023-05-25 13:38:02,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +21: [2023-05-25 13:38:02,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +18: [2023-05-25 13:38:02,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +17: [2023-05-25 13:38:02,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:02,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +13: [2023-05-25 13:38:02,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +18: [2023-05-25 13:38:02,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +13: [2023-05-25 13:38:02,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +10: [2023-05-25 13:38:02,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +14: [2023-05-25 13:38:02,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +16: [2023-05-25 13:38:02,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +14: [2023-05-25 13:38:02,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +10: [2023-05-25 13:38:02,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +21: [2023-05-25 13:38:02,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +16: [2023-05-25 13:38:02,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +13: [2023-05-25 13:38:02,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. +21: [2023-05-25 13:38:02,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +17: [2023-05-25 13:38:02,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +13: [2023-05-25 13:38:02,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +17: [2023-05-25 13:38:02,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +18: [2023-05-25 13:38:02,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +18: [2023-05-25 13:38:02,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +11: [2023-05-25 13:38:02,239] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +11: [2023-05-25 13:38:02,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +27: [2023-05-25 13:38:02,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. +27: [2023-05-25 13:38:02,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. +13: [2023-05-25 13:38:02,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:02,243] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +13: [2023-05-25 13:38:02,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +15: [2023-05-25 13:38:02,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +14: [2023-05-25 13:38:02,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. + 6: [2023-05-25 13:38:02,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. +15: [2023-05-25 13:38:02,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:02,246] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +14: [2023-05-25 13:38:02,247] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +15: [2023-05-25 13:38:02,247] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +12: [2023-05-25 13:38:02,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +12: [2023-05-25 13:38:02,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +15: [2023-05-25 13:38:02,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... + 1: [2023-05-25 13:38:02,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +12: [2023-05-25 13:38:02,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +12: [2023-05-25 13:38:02,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... + 8: [2023-05-25 13:38:02,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... + 9: [2023-05-25 13:38:02,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:02,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +13: [2023-05-25 13:38:02,254] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:02,254] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +27: [2023-05-25 13:38:02,254] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +11: [2023-05-25 13:38:02,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +27: [2023-05-25 13:38:02,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +11: [2023-05-25 13:38:02,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +20: [2023-05-25 13:38:02,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:02,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:02,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:02,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:02,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:02,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:02,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:02,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +13: [2023-05-25 13:38:02,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +20: [2023-05-25 13:38:02,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +20: [2023-05-25 13:38:02,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +20: [2023-05-25 13:38:02,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +20: [2023-05-25 13:38:02,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +20: [2023-05-25 13:38:02,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +20: [2023-05-25 13:38:02,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +20: [2023-05-25 13:38:02,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +20: [2023-05-25 13:38:02,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +13: [2023-05-25 13:38:02,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. + 3: [2023-05-25 13:38:02,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. +13: [2023-05-25 13:38:02,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 2: [2023-05-25 13:38:02,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 3: [2023-05-25 13:38:02,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. +31: [2023-05-25 13:38:02,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +31: [2023-05-25 13:38:02,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. + 2: [2023-05-25 13:38:02,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +26: [2023-05-25 13:38:02,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. +26: [2023-05-25 13:38:02,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +31: [2023-05-25 13:38:02,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:02,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +20: [2023-05-25 13:38:02,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 7: [2023-05-25 13:38:02,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 6: [2023-05-25 13:38:02,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 7: [2023-05-25 13:38:02,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. +20: [2023-05-25 13:38:02,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. +26: [2023-05-25 13:38:02,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +26: [2023-05-25 13:38:02,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +25: [2023-05-25 13:38:02,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. + 2: [2023-05-25 13:38:02,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. + 2: [2023-05-25 13:38:02,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. +25: [2023-05-25 13:38:02,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. + 5: [2023-05-25 13:38:02,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 7: [2023-05-25 13:38:02,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +20: [2023-05-25 13:38:02,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 5: [2023-05-25 13:38:02,309] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. +20: [2023-05-25 13:38:02,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,312] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,312] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 7: [2023-05-25 13:38:02,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 2: [2023-05-25 13:38:02,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +10: [2023-05-25 13:38:02,315] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. +10: [2023-05-25 13:38:02,315] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. + 5: [2023-05-25 13:38:02,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +25: [2023-05-25 13:38:02,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +25: [2023-05-25 13:38:02,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +29: [2023-05-25 13:38:02,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +29: [2023-05-25 13:38:02,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. +30: [2023-05-25 13:38:02,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +30: [2023-05-25 13:38:02,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 2: [2023-05-25 13:38:02,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +13: [2023-05-25 13:38:02,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. +13: [2023-05-25 13:38:02,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. + 5: [2023-05-25 13:38:02,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 5: [2023-05-25 13:38:02,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +10: [2023-05-25 13:38:02,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +10: [2023-05-25 13:38:02,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +29: [2023-05-25 13:38:02,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +29: [2023-05-25 13:38:02,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. +29: [2023-05-25 13:38:02,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +29: [2023-05-25 13:38:02,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +30: [2023-05-25 13:38:02,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +11: [2023-05-25 13:38:02,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. +28: [2023-05-25 13:38:02,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. +11: [2023-05-25 13:38:02,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. +28: [2023-05-25 13:38:02,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. +30: [2023-05-25 13:38:02,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +31: [2023-05-25 13:38:02,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. + 2: [2023-05-25 13:38:02,335] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +31: [2023-05-25 13:38:02,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. +24: [2023-05-25 13:38:02,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. +14: [2023-05-25 13:38:02,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. +14: [2023-05-25 13:38:02,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. + 5: [2023-05-25 13:38:02,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +24: [2023-05-25 13:38:02,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. +26: [2023-05-25 13:38:02,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +26: [2023-05-25 13:38:02,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +24: [2023-05-25 13:38:02,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +24: [2023-05-25 13:38:02,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +26: [2023-05-25 13:38:02,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +26: [2023-05-25 13:38:02,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +11: [2023-05-25 13:38:02,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +29: [2023-05-25 13:38:02,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +11: [2023-05-25 13:38:02,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:02,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:02,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +29: [2023-05-25 13:38:02,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +27: [2023-05-25 13:38:02,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +14: [2023-05-25 13:38:02,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +28: [2023-05-25 13:38:02,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +27: [2023-05-25 13:38:02,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +31: [2023-05-25 13:38:02,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:02,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 1: [2023-05-25 13:38:02,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. +14: [2023-05-25 13:38:02,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +29: [2023-05-25 13:38:02,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +29: [2023-05-25 13:38:02,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +24: [2023-05-25 13:38:02,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +28: [2023-05-25 13:38:02,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +28: [2023-05-25 13:38:02,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. +26: [2023-05-25 13:38:02,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. +24: [2023-05-25 13:38:02,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +26: [2023-05-25 13:38:02,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. +28: [2023-05-25 13:38:02,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. +26: [2023-05-25 13:38:02,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +26: [2023-05-25 13:38:02,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +25: [2023-05-25 13:38:02,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 3: [2023-05-25 13:38:02,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. +30: [2023-05-25 13:38:02,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. +10: [2023-05-25 13:38:02,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:02,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +30: [2023-05-25 13:38:02,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +27: [2023-05-25 13:38:02,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +29: [2023-05-25 13:38:02,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +29: [2023-05-25 13:38:02,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +10: [2023-05-25 13:38:02,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +10: [2023-05-25 13:38:02,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +24: [2023-05-25 13:38:02,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. + 9: [2023-05-25 13:38:02,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. + 9: [2023-05-25 13:38:02,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +27: [2023-05-25 13:38:02,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +12: [2023-05-25 13:38:02,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. +15: [2023-05-25 13:38:02,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. +12: [2023-05-25 13:38:02,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. + 4: [2023-05-25 13:38:02,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. +27: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +27: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +27: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +11: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +27: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. +25: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +27: [2023-05-25 13:38:02,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... +27: [2023-05-25 13:38:02,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +11: [2023-05-25 13:38:02,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +11: [2023-05-25 13:38:02,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +15: [2023-05-25 13:38:02,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +11: [2023-05-25 13:38:02,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +27: [2023-05-25 13:38:02,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... + 1: [2023-05-25 13:38:02,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,369] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +27: [2023-05-25 13:38:02,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +14: [2023-05-25 13:38:02,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 8: [2023-05-25 13:38:02,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +14: [2023-05-25 13:38:02,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +25: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +28: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +28: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +30: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +30: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +30: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. + 8: [2023-05-25 13:38:02,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. +14: [2023-05-25 13:38:02,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +14: [2023-05-25 13:38:02,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... + 3: [2023-05-25 13:38:02,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +13: [2023-05-25 13:38:02,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +13: [2023-05-25 13:38:02,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 1: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... +24: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +30: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 1: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 3: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +15: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:02,379] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,379] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,380] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:02,380] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +25: [2023-05-25 13:38:02,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +13: [2023-05-25 13:38:02,383] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +13: [2023-05-25 13:38:02,383] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +15: [2023-05-25 13:38:02,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +27: [2023-05-25 13:38:02,384] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +27: [2023-05-25 13:38:02,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +28: [2023-05-25 13:38:02,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +28: [2023-05-25 13:38:02,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. + 8: [2023-05-25 13:38:02,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +25: [2023-05-25 13:38:02,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:02,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:02,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +30: [2023-05-25 13:38:02,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +25: [2023-05-25 13:38:02,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +30: [2023-05-25 13:38:02,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +25: [2023-05-25 13:38:02,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:02,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:02,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +25: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +25: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +31: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +31: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +25: [2023-05-25 13:38:02,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +27: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 5: [2023-05-25 13:38:02,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 5: [2023-05-25 13:38:02,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 5: [2023-05-25 13:38:02,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 5: [2023-05-25 13:38:02,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 5: [2023-05-25 13:38:02,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... +27: [2023-05-25 13:38:02,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +25: [2023-05-25 13:38:02,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +23: [2023-05-25 13:38:02,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +23: [2023-05-25 13:38:02,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +27: [2023-05-25 13:38:02,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +25: [2023-05-25 13:38:02,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +30: [2023-05-25 13:38:02,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +30: [2023-05-25 13:38:02,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +30: [2023-05-25 13:38:02,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +30: [2023-05-25 13:38:02,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +27: [2023-05-25 13:38:02,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... +29: [2023-05-25 13:38:02,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:02,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +29: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +30: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +29: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +15: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +29: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +30: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +31: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +30: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +15: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +31: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +24: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +30: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +29: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +28: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +24: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +24: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +29: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... +24: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +24: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +27: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... + 4: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +29: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... +31: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +26: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +29: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... + 6: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +26: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +26: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +29: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +27: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +28: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +26: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +24: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +26: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +26: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +26: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +26: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... +31: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +26: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +26: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +12: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +31: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... +28: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +24: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... + 4: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... +28: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... +28: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +24: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +24: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... +24: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... + 4: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... +26: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... +12: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... +24: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +27: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 4: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 9: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +27: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +12: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +26: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +26: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +26: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +26: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... + 7: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +28: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... +26: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... +28: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +23: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +12: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +23: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +24: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... + 0: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 1: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +27: [2023-05-25 13:38:02,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +30: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... +28: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 2: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 2: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 2: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 0: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 0: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... +30: [2023-05-25 13:38:02,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... +25: [2023-05-25 13:38:02,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... +15: [2023-05-25 13:38:02,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +28: [2023-05-25 13:38:02,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +19: [2023-05-25 13:38:02,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +25: [2023-05-25 13:38:02,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... +30: [2023-05-25 13:38:02,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +30: [2023-05-25 13:38:02,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... +15: [2023-05-25 13:38:02,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +19: [2023-05-25 13:38:02,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +25: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... + 8: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +30: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... + 8: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +30: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 3: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +27: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... + 8: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... + 3: [2023-05-25 13:38:02,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 3: [2023-05-25 13:38:02,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... +11: [2023-05-25 13:38:02,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +11: [2023-05-25 13:38:02,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... +30: [2023-05-25 13:38:02,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 1: [2023-05-25 13:38:02,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +12: [2023-05-25 13:38:02,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +20: [2023-05-25 13:38:02,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +30: [2023-05-25 13:38:02,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +20: [2023-05-25 13:38:02,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +12: [2023-05-25 13:38:02,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +25: [2023-05-25 13:38:02,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +27: [2023-05-25 13:38:02,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +12: [2023-05-25 13:38:02,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +31: [2023-05-25 13:38:02,429] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:02,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:02,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +19: [2023-05-25 13:38:02,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +27: [2023-05-25 13:38:02,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +25: [2023-05-25 13:38:02,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +27: [2023-05-25 13:38:02,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +15: [2023-05-25 13:38:02,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +15: [2023-05-25 13:38:02,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +18: [2023-05-25 13:38:02,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +31: [2023-05-25 13:38:02,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +15: [2023-05-25 13:38:02,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +18: [2023-05-25 13:38:02,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +19: [2023-05-25 13:38:02,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +29: [2023-05-25 13:38:02,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:02,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +11: [2023-05-25 13:38:02,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +15: [2023-05-25 13:38:02,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +27: [2023-05-25 13:38:02,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +28: [2023-05-25 13:38:02,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +30: [2023-05-25 13:38:02,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:02,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +16: [2023-05-25 13:38:02,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +16: [2023-05-25 13:38:02,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +25: [2023-05-25 13:38:02,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +11: [2023-05-25 13:38:02,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +28: [2023-05-25 13:38:02,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +30: [2023-05-25 13:38:02,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:02,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +20: [2023-05-25 13:38:02,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +20: [2023-05-25 13:38:02,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +31: [2023-05-25 13:38:02,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +25: [2023-05-25 13:38:02,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +31: [2023-05-25 13:38:02,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:02,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:02,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:02,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +15: [2023-05-25 13:38:02,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +26: [2023-05-25 13:38:02,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +26: [2023-05-25 13:38:02,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +15: [2023-05-25 13:38:02,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +24: [2023-05-25 13:38:02,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +24: [2023-05-25 13:38:02,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +31: [2023-05-25 13:38:02,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +28: [2023-05-25 13:38:02,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +22: [2023-05-25 13:38:02,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +22: [2023-05-25 13:38:02,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +29: [2023-05-25 13:38:02,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +29: [2023-05-25 13:38:02,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +21: [2023-05-25 13:38:02,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +14: [2023-05-25 13:38:02,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +14: [2023-05-25 13:38:02,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +14: [2023-05-25 13:38:02,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +14: [2023-05-25 13:38:02,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +28: [2023-05-25 13:38:02,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +21: [2023-05-25 13:38:02,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +28: [2023-05-25 13:38:02,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +16: [2023-05-25 13:38:02,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +16: [2023-05-25 13:38:02,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +14: [2023-05-25 13:38:02,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +18: [2023-05-25 13:38:02,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +14: [2023-05-25 13:38:02,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +18: [2023-05-25 13:38:02,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +30: [2023-05-25 13:38:02,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,454] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,454] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +19: [2023-05-25 13:38:02,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +19: [2023-05-25 13:38:02,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +15: [2023-05-25 13:38:02,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +31: [2023-05-25 13:38:02,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +22: [2023-05-25 13:38:02,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +15: [2023-05-25 13:38:02,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +24: [2023-05-25 13:38:02,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +24: [2023-05-25 13:38:02,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +22: [2023-05-25 13:38:02,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +26: [2023-05-25 13:38:02,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +23: [2023-05-25 13:38:02,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +26: [2023-05-25 13:38:02,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +23: [2023-05-25 13:38:02,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +22: [2023-05-25 13:38:02,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +22: [2023-05-25 13:38:02,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +17: [2023-05-25 13:38:02,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +12: [2023-05-25 13:38:02,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +19: [2023-05-25 13:38:02,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +19: [2023-05-25 13:38:02,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +28: [2023-05-25 13:38:02,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +17: [2023-05-25 13:38:02,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +11: [2023-05-25 13:38:02,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +11: [2023-05-25 13:38:02,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +11: [2023-05-25 13:38:02,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +21: [2023-05-25 13:38:02,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +21: [2023-05-25 13:38:02,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +14: [2023-05-25 13:38:02,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,465] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. + 9: [2023-05-25 13:38:02,465] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +14: [2023-05-25 13:38:02,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +19: [2023-05-25 13:38:02,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +11: [2023-05-25 13:38:02,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +19: [2023-05-25 13:38:02,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:02,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:02,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +19: [2023-05-25 13:38:02,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +19: [2023-05-25 13:38:02,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +17: [2023-05-25 13:38:02,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +17: [2023-05-25 13:38:02,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +19: [2023-05-25 13:38:02,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +19: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +23: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +15: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +15: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +23: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +11: [2023-05-25 13:38:02,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +11: [2023-05-25 13:38:02,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +22: [2023-05-25 13:38:02,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +11: [2023-05-25 13:38:02,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +22: [2023-05-25 13:38:02,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +11: [2023-05-25 13:38:02,476] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +17: [2023-05-25 13:38:02,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +23: [2023-05-25 13:38:02,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:02,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +19: [2023-05-25 13:38:02,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:02,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:02,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +19: [2023-05-25 13:38:02,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +23: [2023-05-25 13:38:02,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +23: [2023-05-25 13:38:02,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +18: [2023-05-25 13:38:02,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +18: [2023-05-25 13:38:02,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:02,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:02,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +23: [2023-05-25 13:38:02,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +22: [2023-05-25 13:38:02,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +23: [2023-05-25 13:38:02,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +17: [2023-05-25 13:38:02,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +18: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +19: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +23: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +18: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +17: [2023-05-25 13:38:02,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +17: [2023-05-25 13:38:02,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +15: [2023-05-25 13:38:02,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +15: [2023-05-25 13:38:02,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +16: [2023-05-25 13:38:02,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +16: [2023-05-25 13:38:02,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:02,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +16: [2023-05-25 13:38:02,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +13: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +16: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +21: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +10: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +13: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +21: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +18: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:02,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +16: [2023-05-25 13:38:02,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +16: [2023-05-25 13:38:02,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +16: [2023-05-25 13:38:02,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +17: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +10: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +13: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +15: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +21: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +18: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +15: [2023-05-25 13:38:02,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +21: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +10: [2023-05-25 13:38:02,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +10: [2023-05-25 13:38:02,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:02,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +18: [2023-05-25 13:38:02,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +15: [2023-05-25 13:38:02,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +15: [2023-05-25 13:38:02,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +18: [2023-05-25 13:38:02,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +22: [2023-05-25 13:38:02,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +11: [2023-05-25 13:38:02,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +22: [2023-05-25 13:38:02,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +22: [2023-05-25 13:38:02,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +22: [2023-05-25 13:38:02,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:02,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:02,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +14: [2023-05-25 13:38:02,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +11: [2023-05-25 13:38:02,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +22: [2023-05-25 13:38:02,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +20: [2023-05-25 13:38:02,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:02,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +14: [2023-05-25 13:38:02,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +14: [2023-05-25 13:38:02,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +20: [2023-05-25 13:38:02,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:02,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:02,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:02,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +20: [2023-05-25 13:38:02,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +20: [2023-05-25 13:38:02,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +19: [2023-05-25 13:38:02,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:02,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +14: [2023-05-25 13:38:02,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +21: [2023-05-25 13:38:02,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:02,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +20: [2023-05-25 13:38:02,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +19: [2023-05-25 13:38:02,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +14: [2023-05-25 13:38:02,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +23: [2023-05-25 13:38:02,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +22: [2023-05-25 13:38:02,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:02,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +14: [2023-05-25 13:38:02,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +23: [2023-05-25 13:38:02,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +19: [2023-05-25 13:38:02,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +21: [2023-05-25 13:38:02,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:02,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +21: [2023-05-25 13:38:02,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +22: [2023-05-25 13:38:02,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:02,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +10: [2023-05-25 13:38:02,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +10: [2023-05-25 13:38:02,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +21: [2023-05-25 13:38:02,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +22: [2023-05-25 13:38:02,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +11: [2023-05-25 13:38:02,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +12: [2023-05-25 13:38:02,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +12: [2023-05-25 13:38:02,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +19: [2023-05-25 13:38:02,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +23: [2023-05-25 13:38:02,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:02,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:02,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +23: [2023-05-25 13:38:02,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +22: [2023-05-25 13:38:02,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:02,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +11: [2023-05-25 13:38:02,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +17: [2023-05-25 13:38:02,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +17: [2023-05-25 13:38:02,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +19: [2023-05-25 13:38:02,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +14: [2023-05-25 13:38:02,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +14: [2023-05-25 13:38:02,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +18: [2023-05-25 13:38:02,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:02,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +19: [2023-05-25 13:38:02,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:02,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +19: [2023-05-25 13:38:02,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... + 9: [2023-05-25 13:38:02,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +18: [2023-05-25 13:38:02,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +23: [2023-05-25 13:38:02,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +23: [2023-05-25 13:38:02,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +22: [2023-05-25 13:38:02,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:02,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +10: [2023-05-25 13:38:02,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +21: [2023-05-25 13:38:02,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +13: [2023-05-25 13:38:02,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +22: [2023-05-25 13:38:02,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +12: [2023-05-25 13:38:02,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +21: [2023-05-25 13:38:02,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +12: [2023-05-25 13:38:02,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +16: [2023-05-25 13:38:02,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +13: [2023-05-25 13:38:02,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. + 9: [2023-05-25 13:38:02,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +10: [2023-05-25 13:38:02,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:02,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +16: [2023-05-25 13:38:02,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:02,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +13: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +16: [2023-05-25 13:38:02,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +17: [2023-05-25 13:38:02,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:02,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:02,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:02,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +18: [2023-05-25 13:38:02,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +17: [2023-05-25 13:38:02,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:02,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:02,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +22: [2023-05-25 13:38:02,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +18: [2023-05-25 13:38:02,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +17: [2023-05-25 13:38:02,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +18: [2023-05-25 13:38:02,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +23: [2023-05-25 13:38:02,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +17: [2023-05-25 13:38:02,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +23: [2023-05-25 13:38:02,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +20: [2023-05-25 13:38:02,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:02,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +21: [2023-05-25 13:38:02,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +18: [2023-05-25 13:38:02,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +20: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +10: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +17: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +17: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +10: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +23: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:02,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +17: [2023-05-25 13:38:02,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +21: [2023-05-25 13:38:02,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +22: [2023-05-25 13:38:02,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:02,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +21: [2023-05-25 13:38:02,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +23: [2023-05-25 13:38:02,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +23: [2023-05-25 13:38:02,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +21: [2023-05-25 13:38:02,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +22: [2023-05-25 13:38:02,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +16: [2023-05-25 13:38:02,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,535] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,536] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +20: [2023-05-25 13:38:02,537] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +20: [2023-05-25 13:38:02,537] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. + 9: [2023-05-25 13:38:02,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +16: [2023-05-25 13:38:02,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +22: [2023-05-25 13:38:02,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +13: [2023-05-25 13:38:02,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +16: [2023-05-25 13:38:02,538] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,539] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,539] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +16: [2023-05-25 13:38:02,540] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +13: [2023-05-25 13:38:02,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +17: [2023-05-25 13:38:02,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +13: [2023-05-25 13:38:02,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +18: [2023-05-25 13:38:02,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. + 8: [2023-05-25 13:38:02,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +21: [2023-05-25 13:38:02,542] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +17: [2023-05-25 13:38:02,542] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +18: [2023-05-25 13:38:02,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +18: [2023-05-25 13:38:02,542] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +16: [2023-05-25 13:38:02,543] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +10: [2023-05-25 13:38:02,543] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +20: [2023-05-25 13:38:02,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. + 8: [2023-05-25 13:38:02,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +20: [2023-05-25 13:38:02,544] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +20: [2023-05-25 13:38:02,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +18: [2023-05-25 13:38:02,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +16: [2023-05-25 13:38:02,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +22: [2023-05-25 13:38:02,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +13: [2023-05-25 13:38:02,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +10: [2023-05-25 13:38:02,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +12: [2023-05-25 13:38:02,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +14: [2023-05-25 13:38:02,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +10: [2023-05-25 13:38:02,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +10: [2023-05-25 13:38:02,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +12: [2023-05-25 13:38:02,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +12: [2023-05-25 13:38:02,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +20: [2023-05-25 13:38:02,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +10: [2023-05-25 13:38:02,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +14: [2023-05-25 13:38:02,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +20: [2023-05-25 13:38:02,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +11: [2023-05-25 13:38:02,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +11: [2023-05-25 13:38:02,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +10: [2023-05-25 13:38:02,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +17: [2023-05-25 13:38:02,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:02,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +20: [2023-05-25 13:38:02,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +18: [2023-05-25 13:38:02,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +18: [2023-05-25 13:38:02,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +20: [2023-05-25 13:38:02,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +14: [2023-05-25 13:38:02,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +21: [2023-05-25 13:38:02,563] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +14: [2023-05-25 13:38:02,563] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +17: [2023-05-25 13:38:02,563] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +10: [2023-05-25 13:38:02,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +21: [2023-05-25 13:38:02,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +17: [2023-05-25 13:38:02,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +10: [2023-05-25 13:38:02,566] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +21: [2023-05-25 13:38:02,566] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +10: [2023-05-25 13:38:02,567] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:02,568] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +11: [2023-05-25 13:38:02,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +18: [2023-05-25 13:38:02,568] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +10: [2023-05-25 13:38:02,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +15: [2023-05-25 13:38:02,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +18: [2023-05-25 13:38:02,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +15: [2023-05-25 13:38:02,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +13: [2023-05-25 13:38:02,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +16: [2023-05-25 13:38:02,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +11: [2023-05-25 13:38:02,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +12: [2023-05-25 13:38:02,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +12: [2023-05-25 13:38:02,572] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +16: [2023-05-25 13:38:02,573] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +16: [2023-05-25 13:38:02,576] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:02,576] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +13: [2023-05-25 13:38:02,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +16: [2023-05-25 13:38:02,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +20: [2023-05-25 13:38:02,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +13: [2023-05-25 13:38:02,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +18: [2023-05-25 13:38:02,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +15: [2023-05-25 13:38:02,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +18: [2023-05-25 13:38:02,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +15: [2023-05-25 13:38:02,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +18: [2023-05-25 13:38:02,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +12: [2023-05-25 13:38:02,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:02,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +18: [2023-05-25 13:38:02,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +13: [2023-05-25 13:38:02,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +20: [2023-05-25 13:38:02,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +12: [2023-05-25 13:38:02,588] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +20: [2023-05-25 13:38:02,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:02,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +20: [2023-05-25 13:38:02,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +14: [2023-05-25 13:38:02,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +14: [2023-05-25 13:38:02,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... +14: [2023-05-25 13:38:02,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +18: [2023-05-25 13:38:02,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:02,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +20: [2023-05-25 13:38:02,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... + 6: [2023-05-25 13:38:02,593] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 6: [2023-05-25 13:38:02,593] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. +14: [2023-05-25 13:38:02,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... +18: [2023-05-25 13:38:02,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... + 9: [2023-05-25 13:38:02,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,596] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +11: [2023-05-25 13:38:02,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +10: [2023-05-25 13:38:02,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +11: [2023-05-25 13:38:02,599] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... +13: [2023-05-25 13:38:02,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:02,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +15: [2023-05-25 13:38:02,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +15: [2023-05-25 13:38:02,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +10: [2023-05-25 13:38:02,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... +10: [2023-05-25 13:38:02,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +10: [2023-05-25 13:38:02,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... +11: [2023-05-25 13:38:02,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +11: [2023-05-25 13:38:02,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... +12: [2023-05-25 13:38:02,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +15: [2023-05-25 13:38:02,612] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +27: [2023-05-25 13:38:02,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. +27: [2023-05-25 13:38:02,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. +15: [2023-05-25 13:38:02,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... + 9: [2023-05-25 13:38:02,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... + 4: [2023-05-25 13:38:02,617] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. +12: [2023-05-25 13:38:02,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... + 4: [2023-05-25 13:38:02,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. +13: [2023-05-25 13:38:02,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. +13: [2023-05-25 13:38:02,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. +27: [2023-05-25 13:38:02,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 6: [2023-05-25 13:38:02,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +27: [2023-05-25 13:38:02,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +13: [2023-05-25 13:38:02,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +13: [2023-05-25 13:38:02,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... +24: [2023-05-25 13:38:02,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,636] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,636] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. +24: [2023-05-25 13:38:02,637] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +13: [2023-05-25 13:38:02,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 3: [2023-05-25 13:38:02,643] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 9: [2023-05-25 13:38:02,643] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +13: [2023-05-25 13:38:02,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:02,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. +13: [2023-05-25 13:38:02,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,645] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. +26: [2023-05-25 13:38:02,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +26: [2023-05-25 13:38:02,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +24: [2023-05-25 13:38:02,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +24: [2023-05-25 13:38:02,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 4: [2023-05-25 13:38:02,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. +24: [2023-05-25 13:38:02,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. +24: [2023-05-25 13:38:02,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. + 3: [2023-05-25 13:38:02,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. +29: [2023-05-25 13:38:02,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. +29: [2023-05-25 13:38:02,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 1: [2023-05-25 13:38:02,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +27: [2023-05-25 13:38:02,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. +27: [2023-05-25 13:38:02,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +26: [2023-05-25 13:38:02,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +26: [2023-05-25 13:38:02,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... + 6: [2023-05-25 13:38:02,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... + 5: [2023-05-25 13:38:02,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 5: [2023-05-25 13:38:02,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 5: [2023-05-25 13:38:02,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 2: [2023-05-25 13:38:02,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. +24: [2023-05-25 13:38:02,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +29: [2023-05-25 13:38:02,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +24: [2023-05-25 13:38:02,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +29: [2023-05-25 13:38:02,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 7: [2023-05-25 13:38:02,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. +13: [2023-05-25 13:38:02,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 7: [2023-05-25 13:38:02,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 7: [2023-05-25 13:38:02,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. + 7: [2023-05-25 13:38:02,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. +13: [2023-05-25 13:38:02,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... + 7: [2023-05-25 13:38:02,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. +26: [2023-05-25 13:38:02,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. +26: [2023-05-25 13:38:02,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 2: [2023-05-25 13:38:02,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +25: [2023-05-25 13:38:02,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +25: [2023-05-25 13:38:02,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +27: [2023-05-25 13:38:02,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +27: [2023-05-25 13:38:02,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +13: [2023-05-25 13:38:02,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +28: [2023-05-25 13:38:02,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +28: [2023-05-25 13:38:02,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +13: [2023-05-25 13:38:02,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... + 6: [2023-05-25 13:38:02,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +25: [2023-05-25 13:38:02,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. +25: [2023-05-25 13:38:02,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. +29: [2023-05-25 13:38:02,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. +26: [2023-05-25 13:38:02,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. +29: [2023-05-25 13:38:02,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +26: [2023-05-25 13:38:02,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +31: [2023-05-25 13:38:02,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. +31: [2023-05-25 13:38:02,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. + 4: [2023-05-25 13:38:02,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... + 3: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... + 1: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... + 1: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 2: [2023-05-25 13:38:02,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +25: [2023-05-25 13:38:02,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +25: [2023-05-25 13:38:02,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 2: [2023-05-25 13:38:02,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. +26: [2023-05-25 13:38:02,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 2: [2023-05-25 13:38:02,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. +31: [2023-05-25 13:38:02,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +26: [2023-05-25 13:38:02,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +31: [2023-05-25 13:38:02,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +31: [2023-05-25 13:38:02,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. +29: [2023-05-25 13:38:02,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 6: [2023-05-25 13:38:02,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +25: [2023-05-25 13:38:02,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +25: [2023-05-25 13:38:02,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. + 2: [2023-05-25 13:38:02,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +29: [2023-05-25 13:38:02,698] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +31: [2023-05-25 13:38:02,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. +30: [2023-05-25 13:38:02,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. +30: [2023-05-25 13:38:02,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +30: [2023-05-25 13:38:02,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +28: [2023-05-25 13:38:02,699] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,699] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... +28: [2023-05-25 13:38:02,699] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,700] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 1: [2023-05-25 13:38:02,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +26: [2023-05-25 13:38:02,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +28: [2023-05-25 13:38:02,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. +28: [2023-05-25 13:38:02,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +26: [2023-05-25 13:38:02,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +31: [2023-05-25 13:38:02,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... +31: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. + 3: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. +25: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +25: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. +31: [2023-05-25 13:38:02,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... +25: [2023-05-25 13:38:02,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... +30: [2023-05-25 13:38:02,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 3: [2023-05-25 13:38:02,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +24: [2023-05-25 13:38:02,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +31: [2023-05-25 13:38:02,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +30: [2023-05-25 13:38:02,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. +25: [2023-05-25 13:38:02,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +24: [2023-05-25 13:38:02,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +21: [2023-05-25 13:38:02,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +31: [2023-05-25 13:38:02,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +31: [2023-05-25 13:38:02,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +21: [2023-05-25 13:38:02,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +30: [2023-05-25 13:38:02,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +30: [2023-05-25 13:38:02,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +23: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +23: [2023-05-25 13:38:02,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. + 5: [2023-05-25 13:38:02,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... + 4: [2023-05-25 13:38:02,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... + 5: [2023-05-25 13:38:02,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +29: [2023-05-25 13:38:02,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +29: [2023-05-25 13:38:02,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... +28: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +29: [2023-05-25 13:38:02,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +29: [2023-05-25 13:38:02,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +29: [2023-05-25 13:38:02,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +29: [2023-05-25 13:38:02,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. + 5: [2023-05-25 13:38:02,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... +29: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +29: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... +24: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +29: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +29: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +28: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +24: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. + 4: [2023-05-25 13:38:02,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... +18: [2023-05-25 13:38:02,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +18: [2023-05-25 13:38:02,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +21: [2023-05-25 13:38:02,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +30: [2023-05-25 13:38:02,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +29: [2023-05-25 13:38:02,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +15: [2023-05-25 13:38:02,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. +21: [2023-05-25 13:38:02,727] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +15: [2023-05-25 13:38:02,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. + 5: [2023-05-25 13:38:02,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +14: [2023-05-25 13:38:02,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... +14: [2023-05-25 13:38:02,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. +23: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +29: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... + 3: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +12: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. +12: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. +23: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... + 5: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... +29: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +29: [2023-05-25 13:38:02,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +10: [2023-05-25 13:38:02,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. +10: [2023-05-25 13:38:02,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +29: [2023-05-25 13:38:02,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +10: [2023-05-25 13:38:02,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +29: [2023-05-25 13:38:02,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +10: [2023-05-25 13:38:02,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +22: [2023-05-25 13:38:02,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +22: [2023-05-25 13:38:02,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. + 2: [2023-05-25 13:38:02,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +19: [2023-05-25 13:38:02,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. + 7: [2023-05-25 13:38:02,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +19: [2023-05-25 13:38:02,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +24: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... +18: [2023-05-25 13:38:02,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +14: [2023-05-25 13:38:02,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... +25: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... +25: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +25: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +14: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +25: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +25: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +25: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +25: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +18: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +24: [2023-05-25 13:38:02,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... + 9: [2023-05-25 13:38:02,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +24: [2023-05-25 13:38:02,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +28: [2023-05-25 13:38:02,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +24: [2023-05-25 13:38:02,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +24: [2023-05-25 13:38:02,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +15: [2023-05-25 13:38:02,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +28: [2023-05-25 13:38:02,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +24: [2023-05-25 13:38:02,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +12: [2023-05-25 13:38:02,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... +25: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +25: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +12: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +25: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +25: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +25: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +25: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +25: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +15: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +31: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... +25: [2023-05-25 13:38:02,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +11: [2023-05-25 13:38:02,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +11: [2023-05-25 13:38:02,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... + 2: [2023-05-25 13:38:02,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +31: [2023-05-25 13:38:02,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +31: [2023-05-25 13:38:02,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +25: [2023-05-25 13:38:02,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +20: [2023-05-25 13:38:02,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +31: [2023-05-25 13:38:02,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +31: [2023-05-25 13:38:02,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +20: [2023-05-25 13:38:02,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +31: [2023-05-25 13:38:02,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +24: [2023-05-25 13:38:02,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:02,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +11: [2023-05-25 13:38:02,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. +31: [2023-05-25 13:38:02,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... +31: [2023-05-25 13:38:02,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... +10: [2023-05-25 13:38:02,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +11: [2023-05-25 13:38:02,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. +24: [2023-05-25 13:38:02,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +10: [2023-05-25 13:38:02,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +27: [2023-05-25 13:38:02,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +27: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +27: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +27: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +27: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +10: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +27: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:02,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +22: [2023-05-25 13:38:02,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +22: [2023-05-25 13:38:02,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +27: [2023-05-25 13:38:02,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +27: [2023-05-25 13:38:02,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +27: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +27: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +10: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +27: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +27: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 1: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... +28: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +27: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... +28: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +26: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +27: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... +26: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +26: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +30: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +26: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +26: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +26: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +26: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +26: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +30: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +30: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +30: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +29: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +29: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +30: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +30: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +26: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +28: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +19: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +26: [2023-05-25 13:38:02,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +26: [2023-05-25 13:38:02,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +19: [2023-05-25 13:38:02,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +28: [2023-05-25 13:38:02,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:02,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +30: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +30: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +30: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +26: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +26: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +26: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +20: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +28: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +30: [2023-05-25 13:38:02,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +26: [2023-05-25 13:38:02,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +26: [2023-05-25 13:38:02,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +17: [2023-05-25 13:38:02,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +17: [2023-05-25 13:38:02,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +20: [2023-05-25 13:38:02,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +14: [2023-05-25 13:38:02,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +28: [2023-05-25 13:38:02,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +14: [2023-05-25 13:38:02,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +28: [2023-05-25 13:38:02,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +11: [2023-05-25 13:38:02,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. + 3: [2023-05-25 13:38:02,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. +28: [2023-05-25 13:38:02,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +11: [2023-05-25 13:38:02,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +28: [2023-05-25 13:38:02,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +11: [2023-05-25 13:38:02,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +11: [2023-05-25 13:38:02,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +29: [2023-05-25 13:38:02,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +29: [2023-05-25 13:38:02,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:02,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +21: [2023-05-25 13:38:02,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +12: [2023-05-25 13:38:02,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +21: [2023-05-25 13:38:02,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +12: [2023-05-25 13:38:02,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +14: [2023-05-25 13:38:02,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +15: [2023-05-25 13:38:02,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +15: [2023-05-25 13:38:02,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +29: [2023-05-25 13:38:02,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. + 9: [2023-05-25 13:38:02,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +12: [2023-05-25 13:38:02,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +14: [2023-05-25 13:38:02,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +25: [2023-05-25 13:38:02,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +14: [2023-05-25 13:38:02,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +14: [2023-05-25 13:38:02,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +27: [2023-05-25 13:38:02,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +24: [2023-05-25 13:38:02,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:02,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +15: [2023-05-25 13:38:02,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +15: [2023-05-25 13:38:02,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... + 9: [2023-05-25 13:38:02,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +17: [2023-05-25 13:38:02,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +31: [2023-05-25 13:38:02,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +27: [2023-05-25 13:38:02,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +14: [2023-05-25 13:38:02,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +12: [2023-05-25 13:38:02,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +28: [2023-05-25 13:38:02,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +15: [2023-05-25 13:38:02,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +29: [2023-05-25 13:38:02,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +15: [2023-05-25 13:38:02,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +29: [2023-05-25 13:38:02,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +14: [2023-05-25 13:38:02,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +25: [2023-05-25 13:38:02,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +17: [2023-05-25 13:38:02,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +31: [2023-05-25 13:38:02,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +10: [2023-05-25 13:38:02,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +21: [2023-05-25 13:38:02,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +15: [2023-05-25 13:38:02,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +16: [2023-05-25 13:38:02,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +21: [2023-05-25 13:38:02,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +16: [2023-05-25 13:38:02,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +15: [2023-05-25 13:38:02,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +10: [2023-05-25 13:38:02,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +15: [2023-05-25 13:38:02,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +15: [2023-05-25 13:38:02,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +15: [2023-05-25 13:38:02,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +15: [2023-05-25 13:38:02,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +31: [2023-05-25 13:38:02,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +25: [2023-05-25 13:38:02,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:02,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:02,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:02,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +11: [2023-05-25 13:38:02,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +30: [2023-05-25 13:38:02,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +11: [2023-05-25 13:38:02,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +10: [2023-05-25 13:38:02,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +26: [2023-05-25 13:38:02,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +25: [2023-05-25 13:38:02,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +10: [2023-05-25 13:38:02,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +28: [2023-05-25 13:38:02,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +31: [2023-05-25 13:38:02,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +28: [2023-05-25 13:38:02,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +11: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +28: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +27: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +23: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +23: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +23: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +23: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +27: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +11: [2023-05-25 13:38:02,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +23: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +13: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +23: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +23: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +13: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +23: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +16: [2023-05-25 13:38:02,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +16: [2023-05-25 13:38:02,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +28: [2023-05-25 13:38:02,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +28: [2023-05-25 13:38:02,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +26: [2023-05-25 13:38:02,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +19: [2023-05-25 13:38:02,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +19: [2023-05-25 13:38:02,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +20: [2023-05-25 13:38:02,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +30: [2023-05-25 13:38:02,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +20: [2023-05-25 13:38:02,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +20: [2023-05-25 13:38:02,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +20: [2023-05-25 13:38:02,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +30: [2023-05-25 13:38:02,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +20: [2023-05-25 13:38:02,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +20: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +20: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +20: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +21: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +21: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +26: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +21: [2023-05-25 13:38:02,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +21: [2023-05-25 13:38:02,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. + 8: [2023-05-25 13:38:02,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. + 3: [2023-05-25 13:38:02,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +21: [2023-05-25 13:38:02,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +21: [2023-05-25 13:38:02,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +21: [2023-05-25 13:38:02,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +21: [2023-05-25 13:38:02,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +27: [2023-05-25 13:38:02,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,809] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... +28: [2023-05-25 13:38:02,809] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +15: [2023-05-25 13:38:02,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +15: [2023-05-25 13:38:02,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +13: [2023-05-25 13:38:02,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +13: [2023-05-25 13:38:02,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +26: [2023-05-25 13:38:02,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +27: [2023-05-25 13:38:02,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +21: [2023-05-25 13:38:02,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +27: [2023-05-25 13:38:02,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +27: [2023-05-25 13:38:02,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +27: [2023-05-25 13:38:02,814] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +22: [2023-05-25 13:38:02,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +22: [2023-05-25 13:38:02,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +22: [2023-05-25 13:38:02,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +21: [2023-05-25 13:38:02,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +22: [2023-05-25 13:38:02,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +22: [2023-05-25 13:38:02,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +21: [2023-05-25 13:38:02,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +22: [2023-05-25 13:38:02,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +22: [2023-05-25 13:38:02,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +19: [2023-05-25 13:38:02,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +21: [2023-05-25 13:38:02,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +19: [2023-05-25 13:38:02,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +22: [2023-05-25 13:38:02,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +11: [2023-05-25 13:38:02,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +27: [2023-05-25 13:38:02,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +11: [2023-05-25 13:38:02,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +11: [2023-05-25 13:38:02,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +16: [2023-05-25 13:38:02,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +15: [2023-05-25 13:38:02,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +15: [2023-05-25 13:38:02,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +15: [2023-05-25 13:38:02,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +16: [2023-05-25 13:38:02,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. + 9: [2023-05-25 13:38:02,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +15: [2023-05-25 13:38:02,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +11: [2023-05-25 13:38:02,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... + 8: [2023-05-25 13:38:02,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +23: [2023-05-25 13:38:02,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +11: [2023-05-25 13:38:02,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +11: [2023-05-25 13:38:02,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +23: [2023-05-25 13:38:02,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +11: [2023-05-25 13:38:02,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,825] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +11: [2023-05-25 13:38:02,825] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +10: [2023-05-25 13:38:02,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +10: [2023-05-25 13:38:02,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +10: [2023-05-25 13:38:02,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +10: [2023-05-25 13:38:02,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +10: [2023-05-25 13:38:02,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +12: [2023-05-25 13:38:02,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +10: [2023-05-25 13:38:02,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +10: [2023-05-25 13:38:02,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +15: [2023-05-25 13:38:02,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:02,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +14: [2023-05-25 13:38:02,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +14: [2023-05-25 13:38:02,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +14: [2023-05-25 13:38:02,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +15: [2023-05-25 13:38:02,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +14: [2023-05-25 13:38:02,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +16: [2023-05-25 13:38:02,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +14: [2023-05-25 13:38:02,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +20: [2023-05-25 13:38:02,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +14: [2023-05-25 13:38:02,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +23: [2023-05-25 13:38:02,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +14: [2023-05-25 13:38:02,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +12: [2023-05-25 13:38:02,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +14: [2023-05-25 13:38:02,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +12: [2023-05-25 13:38:02,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +21: [2023-05-25 13:38:02,835] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +23: [2023-05-25 13:38:02,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +16: [2023-05-25 13:38:02,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +20: [2023-05-25 13:38:02,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +21: [2023-05-25 13:38:02,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,843] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +13: [2023-05-25 13:38:02,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +20: [2023-05-25 13:38:02,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +11: [2023-05-25 13:38:02,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +13: [2023-05-25 13:38:02,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +22: [2023-05-25 13:38:02,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +22: [2023-05-25 13:38:02,846] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +13: [2023-05-25 13:38:02,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +21: [2023-05-25 13:38:02,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +13: [2023-05-25 13:38:02,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +17: [2023-05-25 13:38:02,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +17: [2023-05-25 13:38:02,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +13: [2023-05-25 13:38:02,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +13: [2023-05-25 13:38:02,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +16: [2023-05-25 13:38:02,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:02,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:02,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:02,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +20: [2023-05-25 13:38:02,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +13: [2023-05-25 13:38:02,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. +13: [2023-05-25 13:38:02,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. +16: [2023-05-25 13:38:02,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +13: [2023-05-25 13:38:02,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +13: [2023-05-25 13:38:02,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +16: [2023-05-25 13:38:02,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +16: [2023-05-25 13:38:02,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... + 8: [2023-05-25 13:38:02,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +16: [2023-05-25 13:38:02,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +19: [2023-05-25 13:38:02,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +19: [2023-05-25 13:38:02,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +19: [2023-05-25 13:38:02,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +19: [2023-05-25 13:38:02,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +19: [2023-05-25 13:38:02,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +19: [2023-05-25 13:38:02,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +21: [2023-05-25 13:38:02,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +19: [2023-05-25 13:38:02,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +20: [2023-05-25 13:38:02,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +20: [2023-05-25 13:38:02,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. + 8: [2023-05-25 13:38:02,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +19: [2023-05-25 13:38:02,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +19: [2023-05-25 13:38:02,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +12: [2023-05-25 13:38:02,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +19: [2023-05-25 13:38:02,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +19: [2023-05-25 13:38:02,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +19: [2023-05-25 13:38:02,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +11: [2023-05-25 13:38:02,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +22: [2023-05-25 13:38:02,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:02,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +23: [2023-05-25 13:38:02,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +23: [2023-05-25 13:38:02,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +11: [2023-05-25 13:38:02,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:02,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +22: [2023-05-25 13:38:02,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +22: [2023-05-25 13:38:02,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +17: [2023-05-25 13:38:02,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +12: [2023-05-25 13:38:02,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +17: [2023-05-25 13:38:02,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +17: [2023-05-25 13:38:02,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +17: [2023-05-25 13:38:02,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +17: [2023-05-25 13:38:02,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:02,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +14: [2023-05-25 13:38:02,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +14: [2023-05-25 13:38:02,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +10: [2023-05-25 13:38:02,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +16: [2023-05-25 13:38:02,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +13: [2023-05-25 13:38:02,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,867] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,867] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. +17: [2023-05-25 13:38:02,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +17: [2023-05-25 13:38:02,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +17: [2023-05-25 13:38:02,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +17: [2023-05-25 13:38:02,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +17: [2023-05-25 13:38:02,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,868] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +13: [2023-05-25 13:38:02,868] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +20: [2023-05-25 13:38:02,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +22: [2023-05-25 13:38:02,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +16: [2023-05-25 13:38:02,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +18: [2023-05-25 13:38:02,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +18: [2023-05-25 13:38:02,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +18: [2023-05-25 13:38:02,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +23: [2023-05-25 13:38:02,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +12: [2023-05-25 13:38:02,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +18: [2023-05-25 13:38:02,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +18: [2023-05-25 13:38:02,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +18: [2023-05-25 13:38:02,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +23: [2023-05-25 13:38:02,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +16: [2023-05-25 13:38:02,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +22: [2023-05-25 13:38:02,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +12: [2023-05-25 13:38:02,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +20: [2023-05-25 13:38:02,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +18: [2023-05-25 13:38:02,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +18: [2023-05-25 13:38:02,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +18: [2023-05-25 13:38:02,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +14: [2023-05-25 13:38:02,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +14: [2023-05-25 13:38:02,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:02,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +18: [2023-05-25 13:38:02,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +13: [2023-05-25 13:38:02,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +11: [2023-05-25 13:38:02,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +19: [2023-05-25 13:38:02,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:02,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +16: [2023-05-25 13:38:02,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +14: [2023-05-25 13:38:02,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +13: [2023-05-25 13:38:02,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +14: [2023-05-25 13:38:02,883] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +10: [2023-05-25 13:38:02,886] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. +10: [2023-05-25 13:38:02,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. +18: [2023-05-25 13:38:02,889] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +18: [2023-05-25 13:38:02,890] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +17: [2023-05-25 13:38:02,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. + 9: [2023-05-25 13:38:02,891] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. +19: [2023-05-25 13:38:02,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +20: [2023-05-25 13:38:02,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +13: [2023-05-25 13:38:02,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +17: [2023-05-25 13:38:02,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +14: [2023-05-25 13:38:02,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +19: [2023-05-25 13:38:02,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,894] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,894] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +16: [2023-05-25 13:38:02,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +16: [2023-05-25 13:38:02,895] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +20: [2023-05-25 13:38:02,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +23: [2023-05-25 13:38:02,895] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,896] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +23: [2023-05-25 13:38:02,897] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... + 6: [2023-05-25 13:38:02,897] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +14: [2023-05-25 13:38:02,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +10: [2023-05-25 13:38:02,898] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +13: [2023-05-25 13:38:02,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,898] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +13: [2023-05-25 13:38:02,898] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +17: [2023-05-25 13:38:02,899] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +22: [2023-05-25 13:38:02,899] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +13: [2023-05-25 13:38:02,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... +10: [2023-05-25 13:38:02,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +23: [2023-05-25 13:38:02,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +17: [2023-05-25 13:38:02,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +17: [2023-05-25 13:38:02,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +13: [2023-05-25 13:38:02,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +23: [2023-05-25 13:38:02,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +22: [2023-05-25 13:38:02,904] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +22: [2023-05-25 13:38:02,904] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +13: [2023-05-25 13:38:02,905] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... +17: [2023-05-25 13:38:02,905] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... + 9: [2023-05-25 13:38:02,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +19: [2023-05-25 13:38:02,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:02,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... + 9: [2023-05-25 13:38:02,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +18: [2023-05-25 13:38:02,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:02,909] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +20: [2023-05-25 13:38:02,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +20: [2023-05-25 13:38:02,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +21: [2023-05-25 13:38:02,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +21: [2023-05-25 13:38:02,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +14: [2023-05-25 13:38:02,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +22: [2023-05-25 13:38:02,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +22: [2023-05-25 13:38:02,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +14: [2023-05-25 13:38:02,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... +17: [2023-05-25 13:38:02,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +17: [2023-05-25 13:38:02,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 5: [2023-05-25 13:38:02,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. +15: [2023-05-25 13:38:02,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. +15: [2023-05-25 13:38:02,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +10: [2023-05-25 13:38:02,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +18: [2023-05-25 13:38:02,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... +18: [2023-05-25 13:38:02,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +18: [2023-05-25 13:38:02,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +18: [2023-05-25 13:38:02,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +10: [2023-05-25 13:38:02,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... +14: [2023-05-25 13:38:02,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +21: [2023-05-25 13:38:02,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +14: [2023-05-25 13:38:02,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +10: [2023-05-25 13:38:02,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +18: [2023-05-25 13:38:02,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +18: [2023-05-25 13:38:02,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +21: [2023-05-25 13:38:02,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +22: [2023-05-25 13:38:02,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +10: [2023-05-25 13:38:02,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. +22: [2023-05-25 13:38:02,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +15: [2023-05-25 13:38:02,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +18: [2023-05-25 13:38:02,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +15: [2023-05-25 13:38:02,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +18: [2023-05-25 13:38:02,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +18: [2023-05-25 13:38:02,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... +12: [2023-05-25 13:38:02,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. +12: [2023-05-25 13:38:02,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. + 9: [2023-05-25 13:38:02,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +18: [2023-05-25 13:38:02,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +18: [2023-05-25 13:38:02,953] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. +12: [2023-05-25 13:38:02,954] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,955] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +12: [2023-05-25 13:38:02,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +22: [2023-05-25 13:38:02,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +15: [2023-05-25 13:38:02,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +21: [2023-05-25 13:38:02,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +21: [2023-05-25 13:38:02,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +11: [2023-05-25 13:38:02,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. + 1: [2023-05-25 13:38:02,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +11: [2023-05-25 13:38:02,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. +15: [2023-05-25 13:38:02,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... + 1: [2023-05-25 13:38:02,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,959] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +21: [2023-05-25 13:38:02,959] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,959] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +21: [2023-05-25 13:38:02,959] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +22: [2023-05-25 13:38:02,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,962] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,962] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +15: [2023-05-25 13:38:02,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. +22: [2023-05-25 13:38:02,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +15: [2023-05-25 13:38:02,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... +22: [2023-05-25 13:38:02,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +31: [2023-05-25 13:38:02,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +31: [2023-05-25 13:38:02,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +19: [2023-05-25 13:38:02,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +19: [2023-05-25 13:38:02,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +17: [2023-05-25 13:38:02,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +17: [2023-05-25 13:38:02,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +16: [2023-05-25 13:38:02,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +11: [2023-05-25 13:38:02,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +11: [2023-05-25 13:38:02,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +16: [2023-05-25 13:38:02,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +23: [2023-05-25 13:38:02,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +23: [2023-05-25 13:38:02,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. + 3: [2023-05-25 13:38:02,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +20: [2023-05-25 13:38:02,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +20: [2023-05-25 13:38:02,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +18: [2023-05-25 13:38:02,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +12: [2023-05-25 13:38:02,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +18: [2023-05-25 13:38:02,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +18: [2023-05-25 13:38:02,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +31: [2023-05-25 13:38:02,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +31: [2023-05-25 13:38:02,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +12: [2023-05-25 13:38:02,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... +18: [2023-05-25 13:38:02,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +19: [2023-05-25 13:38:02,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +19: [2023-05-25 13:38:02,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. +16: [2023-05-25 13:38:02,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +17: [2023-05-25 13:38:02,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +23: [2023-05-25 13:38:02,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +23: [2023-05-25 13:38:02,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +17: [2023-05-25 13:38:02,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 2: [2023-05-25 13:38:02,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +12: [2023-05-25 13:38:02,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. +16: [2023-05-25 13:38:02,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. + 5: [2023-05-25 13:38:02,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 5: [2023-05-25 13:38:02,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... +12: [2023-05-25 13:38:02,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +20: [2023-05-25 13:38:02,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +20: [2023-05-25 13:38:02,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 5: [2023-05-25 13:38:02,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. +11: [2023-05-25 13:38:02,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +11: [2023-05-25 13:38:03,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 0: [2023-05-25 13:38:03,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 1: [2023-05-25 13:38:03,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 7: [2023-05-25 13:38:03,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 4: [2023-05-25 13:38:03,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 4: [2023-05-25 13:38:03,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 0: [2023-05-25 13:38:03,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +11: [2023-05-25 13:38:03,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... +11: [2023-05-25 13:38:03,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 2: [2023-05-25 13:38:03,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 0: [2023-05-25 13:38:03,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... +19: [2023-05-25 13:38:03,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +19: [2023-05-25 13:38:03,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:03,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +19: [2023-05-25 13:38:03,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +26: [2023-05-25 13:38:03,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +23: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +23: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +16: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +26: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. + 7: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +17: [2023-05-25 13:38:03,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +19: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +20: [2023-05-25 13:38:03,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +23: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +23: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... + 2: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +17: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... +16: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +17: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +20: [2023-05-25 13:38:03,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +20: [2023-05-25 13:38:03,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +17: [2023-05-25 13:38:03,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... + 4: [2023-05-25 13:38:03,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +13: [2023-05-25 13:38:03,024] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +13: [2023-05-25 13:38:03,024] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +20: [2023-05-25 13:38:03,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +14: [2023-05-25 13:38:03,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +14: [2023-05-25 13:38:03,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +26: [2023-05-25 13:38:03,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... +28: [2023-05-25 13:38:03,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. +28: [2023-05-25 13:38:03,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. +26: [2023-05-25 13:38:03,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +24: [2023-05-25 13:38:03,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +24: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. +29: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. + 3: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. +29: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... +13: [2023-05-25 13:38:03,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +13: [2023-05-25 13:38:03,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +14: [2023-05-25 13:38:03,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +14: [2023-05-25 13:38:03,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... + 3: [2023-05-25 13:38:03,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +30: [2023-05-25 13:38:03,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... +30: [2023-05-25 13:38:03,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... +11: [2023-05-25 13:38:03,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +28: [2023-05-25 13:38:03,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +11: [2023-05-25 13:38:03,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. + 5: [2023-05-25 13:38:03,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 4: [2023-05-25 13:38:03,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. +10: [2023-05-25 13:38:03,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +29: [2023-05-25 13:38:03,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +10: [2023-05-25 13:38:03,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +24: [2023-05-25 13:38:03,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +28: [2023-05-25 13:38:03,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:03,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +29: [2023-05-25 13:38:03,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... + 3: [2023-05-25 13:38:03,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +26: [2023-05-25 13:38:03,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +26: [2023-05-25 13:38:03,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +26: [2023-05-25 13:38:03,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +26: [2023-05-25 13:38:03,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:03,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. +25: [2023-05-25 13:38:03,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. +24: [2023-05-25 13:38:03,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... + 4: [2023-05-25 13:38:03,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. + 4: [2023-05-25 13:38:03,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. +25: [2023-05-25 13:38:03,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. +10: [2023-05-25 13:38:03,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +10: [2023-05-25 13:38:03,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +11: [2023-05-25 13:38:03,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +30: [2023-05-25 13:38:03,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:03,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +31: [2023-05-25 13:38:03,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. +31: [2023-05-25 13:38:03,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. + 4: [2023-05-25 13:38:03,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +31: [2023-05-25 13:38:03,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +31: [2023-05-25 13:38:03,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +11: [2023-05-25 13:38:03,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +12: [2023-05-25 13:38:03,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +12: [2023-05-25 13:38:03,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +28: [2023-05-25 13:38:03,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +28: [2023-05-25 13:38:03,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +26: [2023-05-25 13:38:03,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +15: [2023-05-25 13:38:03,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +15: [2023-05-25 13:38:03,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +24: [2023-05-25 13:38:03,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +24: [2023-05-25 13:38:03,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +13: [2023-05-25 13:38:03,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +13: [2023-05-25 13:38:03,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +25: [2023-05-25 13:38:03,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:03,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +26: [2023-05-25 13:38:03,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:03,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +26: [2023-05-25 13:38:03,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +13: [2023-05-25 13:38:03,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +13: [2023-05-25 13:38:03,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... + 9: [2023-05-25 13:38:03,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +26: [2023-05-25 13:38:03,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +25: [2023-05-25 13:38:03,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +31: [2023-05-25 13:38:03,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +31: [2023-05-25 13:38:03,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +31: [2023-05-25 13:38:03,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... +12: [2023-05-25 13:38:03,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +12: [2023-05-25 13:38:03,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +31: [2023-05-25 13:38:03,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +10: [2023-05-25 13:38:03,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +10: [2023-05-25 13:38:03,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... + 7: [2023-05-25 13:38:03,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +29: [2023-05-25 13:38:03,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +29: [2023-05-25 13:38:03,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +29: [2023-05-25 13:38:03,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +29: [2023-05-25 13:38:03,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... +29: [2023-05-25 13:38:03,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:03,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +29: [2023-05-25 13:38:03,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +28: [2023-05-25 13:38:03,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:03,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +15: [2023-05-25 13:38:03,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +15: [2023-05-25 13:38:03,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... + 8: [2023-05-25 13:38:03,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +24: [2023-05-25 13:38:03,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +16: [2023-05-25 13:38:03,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +17: [2023-05-25 13:38:03,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +24: [2023-05-25 13:38:03,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. + 8: [2023-05-25 13:38:03,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +27: [2023-05-25 13:38:03,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. +27: [2023-05-25 13:38:03,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. + 3: [2023-05-25 13:38:03,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... + 9: [2023-05-25 13:38:03,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +14: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +22: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +14: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +22: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. +11: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. +17: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +28: [2023-05-25 13:38:03,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +28: [2023-05-25 13:38:03,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +23: [2023-05-25 13:38:03,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +28: [2023-05-25 13:38:03,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +28: [2023-05-25 13:38:03,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +26: [2023-05-25 13:38:03,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +16: [2023-05-25 13:38:03,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +27: [2023-05-25 13:38:03,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +10: [2023-05-25 13:38:03,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +27: [2023-05-25 13:38:03,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +22: [2023-05-25 13:38:03,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +22: [2023-05-25 13:38:03,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +30: [2023-05-25 13:38:03,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +30: [2023-05-25 13:38:03,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +28: [2023-05-25 13:38:03,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +28: [2023-05-25 13:38:03,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:03,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +28: [2023-05-25 13:38:03,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +28: [2023-05-25 13:38:03,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +29: [2023-05-25 13:38:03,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +28: [2023-05-25 13:38:03,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +10: [2023-05-25 13:38:03,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +11: [2023-05-25 13:38:03,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +26: [2023-05-25 13:38:03,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +23: [2023-05-25 13:38:03,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +26: [2023-05-25 13:38:03,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. + 0: [2023-05-25 13:38:03,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. + 0: [2023-05-25 13:38:03,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. +26: [2023-05-25 13:38:03,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. +31: [2023-05-25 13:38:03,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +31: [2023-05-25 13:38:03,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +31: [2023-05-25 13:38:03,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +31: [2023-05-25 13:38:03,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +31: [2023-05-25 13:38:03,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. +12: [2023-05-25 13:38:03,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +17: [2023-05-25 13:38:03,098] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. +15: [2023-05-25 13:38:03,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +31: [2023-05-25 13:38:03,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +16: [2023-05-25 13:38:03,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +31: [2023-05-25 13:38:03,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +31: [2023-05-25 13:38:03,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +31: [2023-05-25 13:38:03,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +31: [2023-05-25 13:38:03,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +23: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +11: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +11: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +14: [2023-05-25 13:38:03,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +21: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +14: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +21: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. + 8: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. +15: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +27: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +12: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +22: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:03,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +25: [2023-05-25 13:38:03,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +17: [2023-05-25 13:38:03,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +25: [2023-05-25 13:38:03,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... + 4: [2023-05-25 13:38:03,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. +30: [2023-05-25 13:38:03,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... + 9: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. + 9: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +29: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +16: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +20: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +22: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +31: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +20: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +12: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +29: [2023-05-25 13:38:03,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:03,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +12: [2023-05-25 13:38:03,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +20: [2023-05-25 13:38:03,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +20: [2023-05-25 13:38:03,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:03,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +29: [2023-05-25 13:38:03,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +29: [2023-05-25 13:38:03,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +27: [2023-05-25 13:38:03,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +27: [2023-05-25 13:38:03,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +23: [2023-05-25 13:38:03,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +31: [2023-05-25 13:38:03,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +26: [2023-05-25 13:38:03,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +20: [2023-05-25 13:38:03,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +31: [2023-05-25 13:38:03,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +20: [2023-05-25 13:38:03,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +23: [2023-05-25 13:38:03,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +23: [2023-05-25 13:38:03,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +23: [2023-05-25 13:38:03,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +23: [2023-05-25 13:38:03,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +11: [2023-05-25 13:38:03,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +12: [2023-05-25 13:38:03,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +31: [2023-05-25 13:38:03,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +28: [2023-05-25 13:38:03,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +26: [2023-05-25 13:38:03,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +23: [2023-05-25 13:38:03,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +12: [2023-05-25 13:38:03,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +26: [2023-05-25 13:38:03,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +27: [2023-05-25 13:38:03,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +15: [2023-05-25 13:38:03,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +20: [2023-05-25 13:38:03,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +23: [2023-05-25 13:38:03,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +26: [2023-05-25 13:38:03,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +20: [2023-05-25 13:38:03,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +31: [2023-05-25 13:38:03,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +21: [2023-05-25 13:38:03,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +11: [2023-05-25 13:38:03,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. + 0: [2023-05-25 13:38:03,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +28: [2023-05-25 13:38:03,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +10: [2023-05-25 13:38:03,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +21: [2023-05-25 13:38:03,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +31: [2023-05-25 13:38:03,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... + 7: [2023-05-25 13:38:03,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +10: [2023-05-25 13:38:03,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. +15: [2023-05-25 13:38:03,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. +28: [2023-05-25 13:38:03,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +28: [2023-05-25 13:38:03,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +25: [2023-05-25 13:38:03,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +15: [2023-05-25 13:38:03,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +25: [2023-05-25 13:38:03,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +25: [2023-05-25 13:38:03,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +25: [2023-05-25 13:38:03,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +25: [2023-05-25 13:38:03,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +12: [2023-05-25 13:38:03,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +12: [2023-05-25 13:38:03,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +25: [2023-05-25 13:38:03,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +25: [2023-05-25 13:38:03,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +15: [2023-05-25 13:38:03,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +25: [2023-05-25 13:38:03,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +29: [2023-05-25 13:38:03,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +25: [2023-05-25 13:38:03,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +15: [2023-05-25 13:38:03,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... + 9: [2023-05-25 13:38:03,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +25: [2023-05-25 13:38:03,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:03,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +15: [2023-05-25 13:38:03,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... + 8: [2023-05-25 13:38:03,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +24: [2023-05-25 13:38:03,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:03,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:03,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:03,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:03,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:03,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:03,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:03,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +19: [2023-05-25 13:38:03,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +19: [2023-05-25 13:38:03,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +20: [2023-05-25 13:38:03,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +31: [2023-05-25 13:38:03,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +28: [2023-05-25 13:38:03,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +23: [2023-05-25 13:38:03,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +23: [2023-05-25 13:38:03,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +25: [2023-05-25 13:38:03,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +24: [2023-05-25 13:38:03,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +25: [2023-05-25 13:38:03,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +24: [2023-05-25 13:38:03,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +24: [2023-05-25 13:38:03,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:03,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +24: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +17: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +18: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +17: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +24: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +24: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +24: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... + 8: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +20: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +18: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. + 3: [2023-05-25 13:38:03,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. +28: [2023-05-25 13:38:03,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +11: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +16: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +20: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +20: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +19: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +22: [2023-05-25 13:38:03,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +19: [2023-05-25 13:38:03,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +22: [2023-05-25 13:38:03,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +26: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +26: [2023-05-25 13:38:03,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +10: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +28: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +11: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +16: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +18: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +31: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +22: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +28: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +30: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +30: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +10: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +21: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +21: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +30: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +30: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +30: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +14: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +21: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +18: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +21: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +25: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +21: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +21: [2023-05-25 13:38:03,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +14: [2023-05-25 13:38:03,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +30: [2023-05-25 13:38:03,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +28: [2023-05-25 13:38:03,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:03,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +30: [2023-05-25 13:38:03,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:03,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +30: [2023-05-25 13:38:03,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +14: [2023-05-25 13:38:03,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... + 1: [2023-05-25 13:38:03,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +25: [2023-05-25 13:38:03,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +14: [2023-05-25 13:38:03,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +29: [2023-05-25 13:38:03,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +28: [2023-05-25 13:38:03,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +29: [2023-05-25 13:38:03,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +19: [2023-05-25 13:38:03,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +26: [2023-05-25 13:38:03,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +31: [2023-05-25 13:38:03,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:03,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +13: [2023-05-25 13:38:03,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +13: [2023-05-25 13:38:03,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +26: [2023-05-25 13:38:03,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +29: [2023-05-25 13:38:03,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +13: [2023-05-25 13:38:03,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +28: [2023-05-25 13:38:03,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +20: [2023-05-25 13:38:03,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +30: [2023-05-25 13:38:03,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +12: [2023-05-25 13:38:03,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +12: [2023-05-25 13:38:03,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +22: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +19: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +13: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +17: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +26: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +18: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +12: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +30: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. +30: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. +27: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +23: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +17: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +27: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +18: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +26: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +22: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +30: [2023-05-25 13:38:03,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +16: [2023-05-25 13:38:03,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +12: [2023-05-25 13:38:03,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +20: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +19: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +26: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +26: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +23: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +22: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +22: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +28: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +22: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +25: [2023-05-25 13:38:03,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +21: [2023-05-25 13:38:03,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +31: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +18: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... + 0: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +23: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +25: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +21: [2023-05-25 13:38:03,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +13: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. + 0: [2023-05-25 13:38:03,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +13: [2023-05-25 13:38:03,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +16: [2023-05-25 13:38:03,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:03,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +22: [2023-05-25 13:38:03,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +19: [2023-05-25 13:38:03,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +21: [2023-05-25 13:38:03,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +25: [2023-05-25 13:38:03,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +21: [2023-05-25 13:38:03,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... + 7: [2023-05-25 13:38:03,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... + 0: [2023-05-25 13:38:03,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +18: [2023-05-25 13:38:03,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... +16: [2023-05-25 13:38:03,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +16: [2023-05-25 13:38:03,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +16: [2023-05-25 13:38:03,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... +16: [2023-05-25 13:38:03,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +20: [2023-05-25 13:38:03,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... +21: [2023-05-25 13:38:03,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +16: [2023-05-25 13:38:03,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... +16: [2023-05-25 13:38:03,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... +16: [2023-05-25 13:38:03,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +16: [2023-05-25 13:38:03,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +21: [2023-05-25 13:38:03,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +25: [2023-05-25 13:38:03,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +18: [2023-05-25 13:38:03,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +18: [2023-05-25 13:38:03,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +25: [2023-05-25 13:38:03,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +12: [2023-05-25 13:38:03,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +12: [2023-05-25 13:38:03,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +30: [2023-05-25 13:38:03,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:03,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +25: [2023-05-25 13:38:03,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +23: [2023-05-25 13:38:03,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +23: [2023-05-25 13:38:03,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +23: [2023-05-25 13:38:03,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +20: [2023-05-25 13:38:03,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +24: [2023-05-25 13:38:03,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:03,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +20: [2023-05-25 13:38:03,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +20: [2023-05-25 13:38:03,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +23: [2023-05-25 13:38:03,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +23: [2023-05-25 13:38:03,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +18: [2023-05-25 13:38:03,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +18: [2023-05-25 13:38:03,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +27: [2023-05-25 13:38:03,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +27: [2023-05-25 13:38:03,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +12: [2023-05-25 13:38:03,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +12: [2023-05-25 13:38:03,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +23: [2023-05-25 13:38:03,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +20: [2023-05-25 13:38:03,157] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +20: [2023-05-25 13:38:03,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +13: [2023-05-25 13:38:03,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +22: [2023-05-25 13:38:03,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +22: [2023-05-25 13:38:03,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +28: [2023-05-25 13:38:03,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +13: [2023-05-25 13:38:03,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +25: [2023-05-25 13:38:03,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +20: [2023-05-25 13:38:03,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +28: [2023-05-25 13:38:03,161] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... + 8: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +11: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +11: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +29: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +17: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +17: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +17: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +17: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +14: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +14: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +14: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +29: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +20: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +15: [2023-05-25 13:38:03,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +21: [2023-05-25 13:38:03,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +14: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +25: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +15: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +28: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +14: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +17: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +17: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +17: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +19: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +14: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +15: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +19: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +14: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... + 8: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +15: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +25: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +20: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +20: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +23: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +30: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +17: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +26: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +12: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +11: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +11: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +11: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +11: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +30: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +19: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +14: [2023-05-25 13:38:03,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +19: [2023-05-25 13:38:03,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +28: [2023-05-25 13:38:03,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +17: [2023-05-25 13:38:03,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +23: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +26: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +19: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +25: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +25: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +23: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +19: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +11: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +11: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +25: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +21: [2023-05-25 13:38:03,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +13: [2023-05-25 13:38:03,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +17: [2023-05-25 13:38:03,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +17: [2023-05-25 13:38:03,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +24: [2023-05-25 13:38:03,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +24: [2023-05-25 13:38:03,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +16: [2023-05-25 13:38:03,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +17: [2023-05-25 13:38:03,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +27: [2023-05-25 13:38:03,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +27: [2023-05-25 13:38:03,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +22: [2023-05-25 13:38:03,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +13: [2023-05-25 13:38:03,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +26: [2023-05-25 13:38:03,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +19: [2023-05-25 13:38:03,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +16: [2023-05-25 13:38:03,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +22: [2023-05-25 13:38:03,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +26: [2023-05-25 13:38:03,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +27: [2023-05-25 13:38:03,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +29: [2023-05-25 13:38:03,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +19: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +19: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +27: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +27: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +27: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +27: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +21: [2023-05-25 13:38:03,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +12: [2023-05-25 13:38:03,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +21: [2023-05-25 13:38:03,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +27: [2023-05-25 13:38:03,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +18: [2023-05-25 13:38:03,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +18: [2023-05-25 13:38:03,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +18: [2023-05-25 13:38:03,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +18: [2023-05-25 13:38:03,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +19: [2023-05-25 13:38:03,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +18: [2023-05-25 13:38:03,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +12: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +27: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +21: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +27: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +27: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +18: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +27: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +19: [2023-05-25 13:38:03,180] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +30: [2023-05-25 13:38:03,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +18: [2023-05-25 13:38:03,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +30: [2023-05-25 13:38:03,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +21: [2023-05-25 13:38:03,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +16: [2023-05-25 13:38:03,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +16: [2023-05-25 13:38:03,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +18: [2023-05-25 13:38:03,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +19: [2023-05-25 13:38:03,182] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +27: [2023-05-25 13:38:03,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +21: [2023-05-25 13:38:03,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +13: [2023-05-25 13:38:03,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +16: [2023-05-25 13:38:03,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +21: [2023-05-25 13:38:03,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +27: [2023-05-25 13:38:03,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +13: [2023-05-25 13:38:03,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +30: [2023-05-25 13:38:03,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +16: [2023-05-25 13:38:03,186] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +27: [2023-05-25 13:38:03,187] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +18: [2023-05-25 13:38:03,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +12: [2023-05-25 13:38:03,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +13: [2023-05-25 13:38:03,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +27: [2023-05-25 13:38:03,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +30: [2023-05-25 13:38:03,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +18: [2023-05-25 13:38:03,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +19: [2023-05-25 13:38:03,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +30: [2023-05-25 13:38:03,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +30: [2023-05-25 13:38:03,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +27: [2023-05-25 13:38:03,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +15: [2023-05-25 13:38:03,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +10: [2023-05-25 13:38:03,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +10: [2023-05-25 13:38:03,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +10: [2023-05-25 13:38:03,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +11: [2023-05-25 13:38:03,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +10: [2023-05-25 13:38:03,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +11: [2023-05-25 13:38:03,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +10: [2023-05-25 13:38:03,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +16: [2023-05-25 13:38:03,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +13: [2023-05-25 13:38:03,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +13: [2023-05-25 13:38:03,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +16: [2023-05-25 13:38:03,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:03,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +10: [2023-05-25 13:38:03,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:03,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +14: [2023-05-25 13:38:03,196] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +14: [2023-05-25 13:38:03,196] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +10: [2023-05-25 13:38:03,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +29: [2023-05-25 13:38:03,197] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +13: [2023-05-25 13:38:03,197] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +29: [2023-05-25 13:38:03,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +18: [2023-05-25 13:38:03,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:03,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +29: [2023-05-25 13:38:03,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +17: [2023-05-25 13:38:03,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +27: [2023-05-25 13:38:03,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:03,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +18: [2023-05-25 13:38:03,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:03,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +19: [2023-05-25 13:38:03,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +15: [2023-05-25 13:38:03,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. +15: [2023-05-25 13:38:03,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. +11: [2023-05-25 13:38:03,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:03,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +17: [2023-05-25 13:38:03,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +14: [2023-05-25 13:38:03,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:03,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +27: [2023-05-25 13:38:03,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +13: [2023-05-25 13:38:03,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. +19: [2023-05-25 13:38:03,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +13: [2023-05-25 13:38:03,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. +11: [2023-05-25 13:38:03,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +17: [2023-05-25 13:38:03,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +15: [2023-05-25 13:38:03,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,219] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. + 8: [2023-05-25 13:38:03,219] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... + 9: [2023-05-25 13:38:03,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +10: [2023-05-25 13:38:03,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +27: [2023-05-25 13:38:03,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. + 2: [2023-05-25 13:38:03,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. +13: [2023-05-25 13:38:03,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:03,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +13: [2023-05-25 13:38:03,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. +10: [2023-05-25 13:38:03,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. +14: [2023-05-25 13:38:03,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. + 2: [2023-05-25 13:38:03,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +14: [2023-05-25 13:38:03,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. +10: [2023-05-25 13:38:03,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. +10: [2023-05-25 13:38:03,243] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. + 2: [2023-05-25 13:38:03,243] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +10: [2023-05-25 13:38:03,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:03,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +15: [2023-05-25 13:38:03,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... + 9: [2023-05-25 13:38:03,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +14: [2023-05-25 13:38:03,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,254] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +14: [2023-05-25 13:38:03,254] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +23: [2023-05-25 13:38:03,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. +23: [2023-05-25 13:38:03,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. +13: [2023-05-25 13:38:03,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +10: [2023-05-25 13:38:03,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:03,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +13: [2023-05-25 13:38:03,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. + 0: [2023-05-25 13:38:03,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. +13: [2023-05-25 13:38:03,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +13: [2023-05-25 13:38:03,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... + 9: [2023-05-25 13:38:03,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,269] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,269] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +21: [2023-05-25 13:38:03,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +23: [2023-05-25 13:38:03,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +23: [2023-05-25 13:38:03,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +21: [2023-05-25 13:38:03,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. + 8: [2023-05-25 13:38:03,271] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. + 8: [2023-05-25 13:38:03,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. + 0: [2023-05-25 13:38:03,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +14: [2023-05-25 13:38:03,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +21: [2023-05-25 13:38:03,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +21: [2023-05-25 13:38:03,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +14: [2023-05-25 13:38:03,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +10: [2023-05-25 13:38:03,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +14: [2023-05-25 13:38:03,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +12: [2023-05-25 13:38:03,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. + 4: [2023-05-25 13:38:03,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +12: [2023-05-25 13:38:03,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. + 4: [2023-05-25 13:38:03,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +14: [2023-05-25 13:38:03,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +10: [2023-05-25 13:38:03,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +18: [2023-05-25 13:38:03,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +18: [2023-05-25 13:38:03,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +17: [2023-05-25 13:38:03,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +17: [2023-05-25 13:38:03,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. + 7: [2023-05-25 13:38:03,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +10: [2023-05-25 13:38:03,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +10: [2023-05-25 13:38:03,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +16: [2023-05-25 13:38:03,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +16: [2023-05-25 13:38:03,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +12: [2023-05-25 13:38:03,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +12: [2023-05-25 13:38:03,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +23: [2023-05-25 13:38:03,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +23: [2023-05-25 13:38:03,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. +18: [2023-05-25 13:38:03,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +17: [2023-05-25 13:38:03,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +18: [2023-05-25 13:38:03,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. +17: [2023-05-25 13:38:03,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +23: [2023-05-25 13:38:03,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +29: [2023-05-25 13:38:03,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +29: [2023-05-25 13:38:03,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. +20: [2023-05-25 13:38:03,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +16: [2023-05-25 13:38:03,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +20: [2023-05-25 13:38:03,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. +23: [2023-05-25 13:38:03,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +16: [2023-05-25 13:38:03,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +21: [2023-05-25 13:38:03,309] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +21: [2023-05-25 13:38:03,311] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +21: [2023-05-25 13:38:03,312] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +19: [2023-05-25 13:38:03,312] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +21: [2023-05-25 13:38:03,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +19: [2023-05-25 13:38:03,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +29: [2023-05-25 13:38:03,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:03,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. +22: [2023-05-25 13:38:03,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +20: [2023-05-25 13:38:03,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:03,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +11: [2023-05-25 13:38:03,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. +20: [2023-05-25 13:38:03,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. +11: [2023-05-25 13:38:03,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +17: [2023-05-25 13:38:03,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +18: [2023-05-25 13:38:03,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +19: [2023-05-25 13:38:03,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +18: [2023-05-25 13:38:03,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +17: [2023-05-25 13:38:03,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +12: [2023-05-25 13:38:03,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +12: [2023-05-25 13:38:03,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +19: [2023-05-25 13:38:03,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +12: [2023-05-25 13:38:03,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +22: [2023-05-25 13:38:03,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +12: [2023-05-25 13:38:03,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +22: [2023-05-25 13:38:03,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +30: [2023-05-25 13:38:03,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +30: [2023-05-25 13:38:03,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +17: [2023-05-25 13:38:03,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,336] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. +11: [2023-05-25 13:38:03,336] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +16: [2023-05-25 13:38:03,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +18: [2023-05-25 13:38:03,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +17: [2023-05-25 13:38:03,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +20: [2023-05-25 13:38:03,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +18: [2023-05-25 13:38:03,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +20: [2023-05-25 13:38:03,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +11: [2023-05-25 13:38:03,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +16: [2023-05-25 13:38:03,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 1: [2023-05-25 13:38:03,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... +16: [2023-05-25 13:38:03,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +31: [2023-05-25 13:38:03,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +31: [2023-05-25 13:38:03,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +24: [2023-05-25 13:38:03,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +24: [2023-05-25 13:38:03,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. +28: [2023-05-25 13:38:03,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +16: [2023-05-25 13:38:03,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +28: [2023-05-25 13:38:03,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 4: [2023-05-25 13:38:03,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 2: [2023-05-25 13:38:03,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. + 0: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +30: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 2: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 6: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +20: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 3: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +30: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 0: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 5: [2023-05-25 13:38:03,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 7: [2023-05-25 13:38:03,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +20: [2023-05-25 13:38:03,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... + 3: [2023-05-25 13:38:03,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 4: [2023-05-25 13:38:03,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. + 3: [2023-05-25 13:38:03,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. + 7: [2023-05-25 13:38:03,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 3: [2023-05-25 13:38:03,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. +22: [2023-05-25 13:38:03,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +31: [2023-05-25 13:38:03,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. +26: [2023-05-25 13:38:03,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +24: [2023-05-25 13:38:03,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +22: [2023-05-25 13:38:03,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +19: [2023-05-25 13:38:03,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +26: [2023-05-25 13:38:03,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +28: [2023-05-25 13:38:03,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +31: [2023-05-25 13:38:03,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +31: [2023-05-25 13:38:03,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +24: [2023-05-25 13:38:03,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +16: [2023-05-25 13:38:03,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +16: [2023-05-25 13:38:03,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +19: [2023-05-25 13:38:03,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +28: [2023-05-25 13:38:03,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +31: [2023-05-25 13:38:03,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +19: [2023-05-25 13:38:03,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +22: [2023-05-25 13:38:03,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +28: [2023-05-25 13:38:03,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +28: [2023-05-25 13:38:03,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +19: [2023-05-25 13:38:03,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +28: [2023-05-25 13:38:03,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +22: [2023-05-25 13:38:03,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +28: [2023-05-25 13:38:03,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +14: [2023-05-25 13:38:03,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +14: [2023-05-25 13:38:03,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 5: [2023-05-25 13:38:03,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +11: [2023-05-25 13:38:03,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 1: [2023-05-25 13:38:03,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... + 5: [2023-05-25 13:38:03,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +26: [2023-05-25 13:38:03,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +11: [2023-05-25 13:38:03,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +10: [2023-05-25 13:38:03,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +10: [2023-05-25 13:38:03,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. + 5: [2023-05-25 13:38:03,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... + 7: [2023-05-25 13:38:03,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +31: [2023-05-25 13:38:03,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +16: [2023-05-25 13:38:03,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +16: [2023-05-25 13:38:03,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +26: [2023-05-25 13:38:03,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +31: [2023-05-25 13:38:03,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +11: [2023-05-25 13:38:03,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +23: [2023-05-25 13:38:03,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +23: [2023-05-25 13:38:03,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +11: [2023-05-25 13:38:03,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +12: [2023-05-25 13:38:03,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +13: [2023-05-25 13:38:03,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +12: [2023-05-25 13:38:03,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +13: [2023-05-25 13:38:03,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +14: [2023-05-25 13:38:03,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +14: [2023-05-25 13:38:03,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +10: [2023-05-25 13:38:03,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +28: [2023-05-25 13:38:03,383] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,383] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +10: [2023-05-25 13:38:03,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,384] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +24: [2023-05-25 13:38:03,384] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +24: [2023-05-25 13:38:03,384] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. +28: [2023-05-25 13:38:03,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +17: [2023-05-25 13:38:03,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. +23: [2023-05-25 13:38:03,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +23: [2023-05-25 13:38:03,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +18: [2023-05-25 13:38:03,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +18: [2023-05-25 13:38:03,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +28: [2023-05-25 13:38:03,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +17: [2023-05-25 13:38:03,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. +11: [2023-05-25 13:38:03,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +11: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... +12: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +12: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... +26: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +26: [2023-05-25 13:38:03,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... + 9: [2023-05-25 13:38:03,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +13: [2023-05-25 13:38:03,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +13: [2023-05-25 13:38:03,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +24: [2023-05-25 13:38:03,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. +24: [2023-05-25 13:38:03,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. +28: [2023-05-25 13:38:03,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. +28: [2023-05-25 13:38:03,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +28: [2023-05-25 13:38:03,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. + 9: [2023-05-25 13:38:03,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +17: [2023-05-25 13:38:03,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... + 3: [2023-05-25 13:38:03,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +29: [2023-05-25 13:38:03,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +29: [2023-05-25 13:38:03,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +15: [2023-05-25 13:38:03,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +29: [2023-05-25 13:38:03,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +29: [2023-05-25 13:38:03,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +15: [2023-05-25 13:38:03,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +28: [2023-05-25 13:38:03,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +29: [2023-05-25 13:38:03,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +29: [2023-05-25 13:38:03,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:03,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +25: [2023-05-25 13:38:03,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +25: [2023-05-25 13:38:03,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +18: [2023-05-25 13:38:03,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +29: [2023-05-25 13:38:03,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... + 3: [2023-05-25 13:38:03,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +28: [2023-05-25 13:38:03,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +28: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +11: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +27: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. + 0: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. + 0: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. +17: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +27: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +25: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +25: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +18: [2023-05-25 13:38:03,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... + 3: [2023-05-25 13:38:03,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +10: [2023-05-25 13:38:03,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +26: [2023-05-25 13:38:03,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +11: [2023-05-25 13:38:03,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +10: [2023-05-25 13:38:03,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +27: [2023-05-25 13:38:03,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +26: [2023-05-25 13:38:03,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,411] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +28: [2023-05-25 13:38:03,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +27: [2023-05-25 13:38:03,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +14: [2023-05-25 13:38:03,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... +30: [2023-05-25 13:38:03,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +30: [2023-05-25 13:38:03,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +28: [2023-05-25 13:38:03,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +14: [2023-05-25 13:38:03,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +14: [2023-05-25 13:38:03,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +31: [2023-05-25 13:38:03,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +10: [2023-05-25 13:38:03,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +31: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +31: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +31: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +31: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +31: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +19: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +19: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. +14: [2023-05-25 13:38:03,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +10: [2023-05-25 13:38:03,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... + 4: [2023-05-25 13:38:03,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. +31: [2023-05-25 13:38:03,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. +19: [2023-05-25 13:38:03,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:03,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:03,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +19: [2023-05-25 13:38:03,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +31: [2023-05-25 13:38:03,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +28: [2023-05-25 13:38:03,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +31: [2023-05-25 13:38:03,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +31: [2023-05-25 13:38:03,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +31: [2023-05-25 13:38:03,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +31: [2023-05-25 13:38:03,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +27: [2023-05-25 13:38:03,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +12: [2023-05-25 13:38:03,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +12: [2023-05-25 13:38:03,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +27: [2023-05-25 13:38:03,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +24: [2023-05-25 13:38:03,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +12: [2023-05-25 13:38:03,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +12: [2023-05-25 13:38:03,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +24: [2023-05-25 13:38:03,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +27: [2023-05-25 13:38:03,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +11: [2023-05-25 13:38:03,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +29: [2023-05-25 13:38:03,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +29: [2023-05-25 13:38:03,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +11: [2023-05-25 13:38:03,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +13: [2023-05-25 13:38:03,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +13: [2023-05-25 13:38:03,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +30: [2023-05-25 13:38:03,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +30: [2023-05-25 13:38:03,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +25: [2023-05-25 13:38:03,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +25: [2023-05-25 13:38:03,429] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +15: [2023-05-25 13:38:03,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +13: [2023-05-25 13:38:03,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +13: [2023-05-25 13:38:03,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +15: [2023-05-25 13:38:03,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +25: [2023-05-25 13:38:03,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +25: [2023-05-25 13:38:03,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. + 9: [2023-05-25 13:38:03,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +14: [2023-05-25 13:38:03,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +14: [2023-05-25 13:38:03,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +31: [2023-05-25 13:38:03,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. +31: [2023-05-25 13:38:03,432] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. +29: [2023-05-25 13:38:03,432] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +27: [2023-05-25 13:38:03,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +21: [2023-05-25 13:38:03,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +21: [2023-05-25 13:38:03,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +20: [2023-05-25 13:38:03,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +20: [2023-05-25 13:38:03,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +18: [2023-05-25 13:38:03,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +12: [2023-05-25 13:38:03,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +18: [2023-05-25 13:38:03,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. + 9: [2023-05-25 13:38:03,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +12: [2023-05-25 13:38:03,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +19: [2023-05-25 13:38:03,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:03,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... + 8: [2023-05-25 13:38:03,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. + 8: [2023-05-25 13:38:03,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +26: [2023-05-25 13:38:03,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +26: [2023-05-25 13:38:03,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +29: [2023-05-25 13:38:03,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... + 7: [2023-05-25 13:38:03,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +23: [2023-05-25 13:38:03,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +23: [2023-05-25 13:38:03,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... +24: [2023-05-25 13:38:03,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. +24: [2023-05-25 13:38:03,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:03,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... +15: [2023-05-25 13:38:03,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +25: [2023-05-25 13:38:03,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:03,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +22: [2023-05-25 13:38:03,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +22: [2023-05-25 13:38:03,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +28: [2023-05-25 13:38:03,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... +15: [2023-05-25 13:38:03,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +10: [2023-05-25 13:38:03,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +10: [2023-05-25 13:38:03,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +15: [2023-05-25 13:38:03,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +25: [2023-05-25 13:38:03,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:03,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:03,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +31: [2023-05-25 13:38:03,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +31: [2023-05-25 13:38:03,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +25: [2023-05-25 13:38:03,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +21: [2023-05-25 13:38:03,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +11: [2023-05-25 13:38:03,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +15: [2023-05-25 13:38:03,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +14: [2023-05-25 13:38:03,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:03,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +16: [2023-05-25 13:38:03,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +16: [2023-05-25 13:38:03,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +25: [2023-05-25 13:38:03,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +28: [2023-05-25 13:38:03,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +16: [2023-05-25 13:38:03,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +16: [2023-05-25 13:38:03,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +16: [2023-05-25 13:38:03,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +19: [2023-05-25 13:38:03,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +21: [2023-05-25 13:38:03,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +19: [2023-05-25 13:38:03,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +16: [2023-05-25 13:38:03,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +25: [2023-05-25 13:38:03,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +11: [2023-05-25 13:38:03,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +16: [2023-05-25 13:38:03,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +16: [2023-05-25 13:38:03,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +12: [2023-05-25 13:38:03,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +21: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +20: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +25: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +21: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +25: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +16: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +18: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +16: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... + 7: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... +29: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +12: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +17: [2023-05-25 13:38:03,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +17: [2023-05-25 13:38:03,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:03,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:03,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +17: [2023-05-25 13:38:03,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +18: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +26: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +31: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +17: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +26: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +26: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +17: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +23: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +26: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +26: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +17: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +26: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +17: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +26: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +26: [2023-05-25 13:38:03,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +26: [2023-05-25 13:38:03,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +23: [2023-05-25 13:38:03,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +26: [2023-05-25 13:38:03,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +26: [2023-05-25 13:38:03,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +22: [2023-05-25 13:38:03,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,454] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... + 0: [2023-05-25 13:38:03,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +26: [2023-05-25 13:38:03,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +28: [2023-05-25 13:38:03,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +26: [2023-05-25 13:38:03,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... +10: [2023-05-25 13:38:03,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +12: [2023-05-25 13:38:03,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +12: [2023-05-25 13:38:03,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +22: [2023-05-25 13:38:03,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:03,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +17: [2023-05-25 13:38:03,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +27: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +23: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +27: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +27: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +27: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +10: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +23: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +28: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +12: [2023-05-25 13:38:03,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +23: [2023-05-25 13:38:03,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +23: [2023-05-25 13:38:03,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +26: [2023-05-25 13:38:03,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +21: [2023-05-25 13:38:03,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +27: [2023-05-25 13:38:03,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +12: [2023-05-25 13:38:03,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +23: [2023-05-25 13:38:03,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +23: [2023-05-25 13:38:03,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +25: [2023-05-25 13:38:03,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +27: [2023-05-25 13:38:03,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +31: [2023-05-25 13:38:03,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +27: [2023-05-25 13:38:03,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +27: [2023-05-25 13:38:03,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +23: [2023-05-25 13:38:03,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +16: [2023-05-25 13:38:03,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +16: [2023-05-25 13:38:03,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:03,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +23: [2023-05-25 13:38:03,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:03,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:03,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +19: [2023-05-25 13:38:03,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +31: [2023-05-25 13:38:03,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +21: [2023-05-25 13:38:03,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +25: [2023-05-25 13:38:03,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +20: [2023-05-25 13:38:03,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +25: [2023-05-25 13:38:03,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +27: [2023-05-25 13:38:03,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +25: [2023-05-25 13:38:03,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +20: [2023-05-25 13:38:03,465] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +29: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +29: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +29: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +27: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +29: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +25: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +31: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +28: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +31: [2023-05-25 13:38:03,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +28: [2023-05-25 13:38:03,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +28: [2023-05-25 13:38:03,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... +28: [2023-05-25 13:38:03,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +28: [2023-05-25 13:38:03,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +27: [2023-05-25 13:38:03,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +17: [2023-05-25 13:38:03,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +28: [2023-05-25 13:38:03,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +28: [2023-05-25 13:38:03,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +19: [2023-05-25 13:38:03,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +21: [2023-05-25 13:38:03,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +21: [2023-05-25 13:38:03,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +17: [2023-05-25 13:38:03,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +29: [2023-05-25 13:38:03,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +21: [2023-05-25 13:38:03,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +21: [2023-05-25 13:38:03,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +19: [2023-05-25 13:38:03,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +31: [2023-05-25 13:38:03,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +19: [2023-05-25 13:38:03,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +25: [2023-05-25 13:38:03,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +19: [2023-05-25 13:38:03,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +31: [2023-05-25 13:38:03,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:03,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_12_optim_states.pt... +25: [2023-05-25 13:38:03,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_12_optim_states.pt... +31: [2023-05-25 13:38:03,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +31: [2023-05-25 13:38:03,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:03,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:03,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +31: [2023-05-25 13:38:03,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +19: [2023-05-25 13:38:03,476] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +19: [2023-05-25 13:38:03,476] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +29: [2023-05-25 13:38:03,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +17: [2023-05-25 13:38:03,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +29: [2023-05-25 13:38:03,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_12_optim_states.pt... +29: [2023-05-25 13:38:03,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_12_optim_states.pt... +20: [2023-05-25 13:38:03,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +13: [2023-05-25 13:38:03,477] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +29: [2023-05-25 13:38:03,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +20: [2023-05-25 13:38:03,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +13: [2023-05-25 13:38:03,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +28: [2023-05-25 13:38:03,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_12_optim_states.pt... +28: [2023-05-25 13:38:03,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_12_optim_states.pt... + 2: [2023-05-25 13:38:03,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +13: [2023-05-25 13:38:03,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +31: [2023-05-25 13:38:03,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_12_optim_states.pt... +13: [2023-05-25 13:38:03,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +25: [2023-05-25 13:38:03,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +25: [2023-05-25 13:38:03,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +16: [2023-05-25 13:38:03,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +21: [2023-05-25 13:38:03,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +31: [2023-05-25 13:38:03,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_12_optim_states.pt... + 8: [2023-05-25 13:38:03,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +25: [2023-05-25 13:38:03,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:03,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. +21: [2023-05-25 13:38:03,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +30: [2023-05-25 13:38:03,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. +16: [2023-05-25 13:38:03,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +25: [2023-05-25 13:38:03,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +22: [2023-05-25 13:38:03,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +25: [2023-05-25 13:38:03,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +22: [2023-05-25 13:38:03,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +28: [2023-05-25 13:38:03,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +26: [2023-05-25 13:38:03,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +25: [2023-05-25 13:38:03,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +17: [2023-05-25 13:38:03,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +21: [2023-05-25 13:38:03,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +24: [2023-05-25 13:38:03,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +22: [2023-05-25 13:38:03,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +22: [2023-05-25 13:38:03,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +23: [2023-05-25 13:38:03,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +23: [2023-05-25 13:38:03,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +23: [2023-05-25 13:38:03,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +24: [2023-05-25 13:38:03,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +31: [2023-05-25 13:38:03,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:03,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +12: [2023-05-25 13:38:03,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +12: [2023-05-25 13:38:03,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +21: [2023-05-25 13:38:03,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +24: [2023-05-25 13:38:03,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +24: [2023-05-25 13:38:03,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +24: [2023-05-25 13:38:03,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +22: [2023-05-25 13:38:03,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +24: [2023-05-25 13:38:03,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +12: [2023-05-25 13:38:03,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +24: [2023-05-25 13:38:03,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +31: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +22: [2023-05-25 13:38:03,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +27: [2023-05-25 13:38:03,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +28: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +23: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +23: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +24: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +30: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +12: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +30: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:03,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +17: [2023-05-25 13:38:03,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +21: [2023-05-25 13:38:03,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +16: [2023-05-25 13:38:03,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +26: [2023-05-25 13:38:03,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:03,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +23: [2023-05-25 13:38:03,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +30: [2023-05-25 13:38:03,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +22: [2023-05-25 13:38:03,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +30: [2023-05-25 13:38:03,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +30: [2023-05-25 13:38:03,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +30: [2023-05-25 13:38:03,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +30: [2023-05-25 13:38:03,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... + 8: [2023-05-25 13:38:03,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +22: [2023-05-25 13:38:03,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +16: [2023-05-25 13:38:03,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +22: [2023-05-25 13:38:03,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +16: [2023-05-25 13:38:03,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +19: [2023-05-25 13:38:03,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +19: [2023-05-25 13:38:03,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +16: [2023-05-25 13:38:03,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +27: [2023-05-25 13:38:03,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +21: [2023-05-25 13:38:03,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:03,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:03,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:03,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:03,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +25: [2023-05-25 13:38:03,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:03,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +12: [2023-05-25 13:38:03,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +12: [2023-05-25 13:38:03,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +21: [2023-05-25 13:38:03,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +16: [2023-05-25 13:38:03,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +26: [2023-05-25 13:38:03,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +20: [2023-05-25 13:38:03,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +22: [2023-05-25 13:38:03,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +20: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +25: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +22: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +17: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +19: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +19: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +12: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +21: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +26: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +22: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +20: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +25: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +26: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +20: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +16: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +18: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +18: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +18: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +18: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +18: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +23: [2023-05-25 13:38:03,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +18: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +30: [2023-05-25 13:38:03,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +21: [2023-05-25 13:38:03,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +17: [2023-05-25 13:38:03,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +25: [2023-05-25 13:38:03,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +18: [2023-05-25 13:38:03,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +18: [2023-05-25 13:38:03,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +18: [2023-05-25 13:38:03,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:03,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +26: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +20: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +18: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... + 8: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +18: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... + 8: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +18: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +12: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +27: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:03,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +26: [2023-05-25 13:38:03,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +11: [2023-05-25 13:38:03,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +13: [2023-05-25 13:38:03,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +11: [2023-05-25 13:38:03,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +13: [2023-05-25 13:38:03,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +17: [2023-05-25 13:38:03,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +26: [2023-05-25 13:38:03,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. +27: [2023-05-25 13:38:03,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. +26: [2023-05-25 13:38:03,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +23: [2023-05-25 13:38:03,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +28: [2023-05-25 13:38:03,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +21: [2023-05-25 13:38:03,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +27: [2023-05-25 13:38:03,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +10: [2023-05-25 13:38:03,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +10: [2023-05-25 13:38:03,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +10: [2023-05-25 13:38:03,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +10: [2023-05-25 13:38:03,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +20: [2023-05-25 13:38:03,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +28: [2023-05-25 13:38:03,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +27: [2023-05-25 13:38:03,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +10: [2023-05-25 13:38:03,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +10: [2023-05-25 13:38:03,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +10: [2023-05-25 13:38:03,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +20: [2023-05-25 13:38:03,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +10: [2023-05-25 13:38:03,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +21: [2023-05-25 13:38:03,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +11: [2023-05-25 13:38:03,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +20: [2023-05-25 13:38:03,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +11: [2023-05-25 13:38:03,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +27: [2023-05-25 13:38:03,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +27: [2023-05-25 13:38:03,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +27: [2023-05-25 13:38:03,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +11: [2023-05-25 13:38:03,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +15: [2023-05-25 13:38:03,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +15: [2023-05-25 13:38:03,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +11: [2023-05-25 13:38:03,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:03,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +15: [2023-05-25 13:38:03,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +15: [2023-05-25 13:38:03,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +14: [2023-05-25 13:38:03,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +21: [2023-05-25 13:38:03,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +15: [2023-05-25 13:38:03,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +11: [2023-05-25 13:38:03,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:03,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +14: [2023-05-25 13:38:03,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +14: [2023-05-25 13:38:03,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +22: [2023-05-25 13:38:03,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +15: [2023-05-25 13:38:03,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +15: [2023-05-25 13:38:03,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +11: [2023-05-25 13:38:03,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +13: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +14: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... + 8: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +27: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +13: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +24: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +14: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +26: [2023-05-25 13:38:03,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:03,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +26: [2023-05-25 13:38:03,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +13: [2023-05-25 13:38:03,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +13: [2023-05-25 13:38:03,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +27: [2023-05-25 13:38:03,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +26: [2023-05-25 13:38:03,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +30: [2023-05-25 13:38:03,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +18: [2023-05-25 13:38:03,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +26: [2023-05-25 13:38:03,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_12_optim_states.pt... +20: [2023-05-25 13:38:03,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +22: [2023-05-25 13:38:03,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +30: [2023-05-25 13:38:03,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +28: [2023-05-25 13:38:03,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:03,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... + 8: [2023-05-25 13:38:03,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +26: [2023-05-25 13:38:03,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_12_optim_states.pt... +27: [2023-05-25 13:38:03,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_12_optim_states.pt... +27: [2023-05-25 13:38:03,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_12_optim_states.pt... +28: [2023-05-25 13:38:03,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +20: [2023-05-25 13:38:03,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +30: [2023-05-25 13:38:03,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +22: [2023-05-25 13:38:03,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +18: [2023-05-25 13:38:03,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +18: [2023-05-25 13:38:03,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +13: [2023-05-25 13:38:03,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... + 9: [2023-05-25 13:38:03,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +20: [2023-05-25 13:38:03,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +24: [2023-05-25 13:38:03,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +13: [2023-05-25 13:38:03,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +24: [2023-05-25 13:38:03,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +22: [2023-05-25 13:38:03,535] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +10: [2023-05-25 13:38:03,536] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +22: [2023-05-25 13:38:03,538] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +30: [2023-05-25 13:38:03,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +22: [2023-05-25 13:38:03,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +30: [2023-05-25 13:38:03,539] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +20: [2023-05-25 13:38:03,539] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +24: [2023-05-25 13:38:03,540] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,540] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:03,540] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,540] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +22: [2023-05-25 13:38:03,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +30: [2023-05-25 13:38:03,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +24: [2023-05-25 13:38:03,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +22: [2023-05-25 13:38:03,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +30: [2023-05-25 13:38:03,543] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,543] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +27: [2023-05-25 13:38:03,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +18: [2023-05-25 13:38:03,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +27: [2023-05-25 13:38:03,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +10: [2023-05-25 13:38:03,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +15: [2023-05-25 13:38:03,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +24: [2023-05-25 13:38:03,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +10: [2023-05-25 13:38:03,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +11: [2023-05-25 13:38:03,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +11: [2023-05-25 13:38:03,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +11: [2023-05-25 13:38:03,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +13: [2023-05-25 13:38:03,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +11: [2023-05-25 13:38:03,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +11: [2023-05-25 13:38:03,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +30: [2023-05-25 13:38:03,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +30: [2023-05-25 13:38:03,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +11: [2023-05-25 13:38:03,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +30: [2023-05-25 13:38:03,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:03,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +30: [2023-05-25 13:38:03,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +30: [2023-05-25 13:38:03,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +13: [2023-05-25 13:38:03,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +15: [2023-05-25 13:38:03,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +14: [2023-05-25 13:38:03,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +14: [2023-05-25 13:38:03,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +13: [2023-05-25 13:38:03,553] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... + 8: [2023-05-25 13:38:03,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +15: [2023-05-25 13:38:03,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +15: [2023-05-25 13:38:03,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +27: [2023-05-25 13:38:03,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +24: [2023-05-25 13:38:03,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_12_optim_states.pt... +24: [2023-05-25 13:38:03,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_12_optim_states.pt... +13: [2023-05-25 13:38:03,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +30: [2023-05-25 13:38:03,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_12_optim_states.pt... +30: [2023-05-25 13:38:03,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_12_optim_states.pt... +15: [2023-05-25 13:38:03,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:03,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,563] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +11: [2023-05-25 13:38:03,563] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +15: [2023-05-25 13:38:03,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +15: [2023-05-25 13:38:03,566] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:03,566] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,567] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +14: [2023-05-25 13:38:03,567] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +14: [2023-05-25 13:38:03,567] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,567] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +11: [2023-05-25 13:38:03,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,572] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,572] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:03,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +14: [2023-05-25 13:38:03,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +12: [2023-05-25 13:38:03,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +12: [2023-05-25 13:38:03,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:03,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +15: [2023-05-25 13:38:03,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +14: [2023-05-25 13:38:03,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:03,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. +12: [2023-05-25 13:38:03,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:03,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. +15: [2023-05-25 13:38:03,597] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +12: [2023-05-25 13:38:03,599] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +29: [2023-05-25 13:38:03,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. +29: [2023-05-25 13:38:03,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... + 8: [2023-05-25 13:38:03,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +11: [2023-05-25 13:38:03,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +11: [2023-05-25 13:38:03,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +13: [2023-05-25 13:38:03,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +13: [2023-05-25 13:38:03,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +29: [2023-05-25 13:38:03,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:03,613] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,616] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 4: [2023-05-25 13:38:03,617] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +14: [2023-05-25 13:38:03,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +22: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +17: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +22: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +17: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +14: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +11: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +23: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +23: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +14: [2023-05-25 13:38:03,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +19: [2023-05-25 13:38:03,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +12: [2023-05-25 13:38:03,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +19: [2023-05-25 13:38:03,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +20: [2023-05-25 13:38:03,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +20: [2023-05-25 13:38:03,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +14: [2023-05-25 13:38:03,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +13: [2023-05-25 13:38:03,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +13: [2023-05-25 13:38:03,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +11: [2023-05-25 13:38:03,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +12: [2023-05-25 13:38:03,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +10: [2023-05-25 13:38:03,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +10: [2023-05-25 13:38:03,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +16: [2023-05-25 13:38:03,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +16: [2023-05-25 13:38:03,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +12: [2023-05-25 13:38:03,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +18: [2023-05-25 13:38:03,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +18: [2023-05-25 13:38:03,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +12: [2023-05-25 13:38:03,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... + 4: [2023-05-25 13:38:03,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +21: [2023-05-25 13:38:03,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +21: [2023-05-25 13:38:03,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +23: [2023-05-25 13:38:03,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +23: [2023-05-25 13:38:03,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:03,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +22: [2023-05-25 13:38:03,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:03,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +20: [2023-05-25 13:38:03,636] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +20: [2023-05-25 13:38:03,636] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:03,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:03,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +10: [2023-05-25 13:38:03,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +29: [2023-05-25 13:38:03,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +10: [2023-05-25 13:38:03,639] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. +16: [2023-05-25 13:38:03,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +22: [2023-05-25 13:38:03,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. +16: [2023-05-25 13:38:03,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 3: [2023-05-25 13:38:03,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. +29: [2023-05-25 13:38:03,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +18: [2023-05-25 13:38:03,642] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +18: [2023-05-25 13:38:03,643] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +21: [2023-05-25 13:38:03,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. +21: [2023-05-25 13:38:03,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,645] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. +11: [2023-05-25 13:38:03,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +13: [2023-05-25 13:38:03,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +10: [2023-05-25 13:38:03,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +11: [2023-05-25 13:38:03,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +10: [2023-05-25 13:38:03,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +13: [2023-05-25 13:38:03,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 2: [2023-05-25 13:38:03,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +13: [2023-05-25 13:38:03,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... + 6: [2023-05-25 13:38:03,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... + 3: [2023-05-25 13:38:03,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. +13: [2023-05-25 13:38:03,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... + 3: [2023-05-25 13:38:03,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +29: [2023-05-25 13:38:03,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +29: [2023-05-25 13:38:03,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... + 1: [2023-05-25 13:38:03,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +11: [2023-05-25 13:38:03,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. +11: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. +23: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +23: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. +17: [2023-05-25 13:38:03,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +23: [2023-05-25 13:38:03,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +23: [2023-05-25 13:38:03,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... + 7: [2023-05-25 13:38:03,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +20: [2023-05-25 13:38:03,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +22: [2023-05-25 13:38:03,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +17: [2023-05-25 13:38:03,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +19: [2023-05-25 13:38:03,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +17: [2023-05-25 13:38:03,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:03,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +16: [2023-05-25 13:38:03,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +10: [2023-05-25 13:38:03,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +19: [2023-05-25 13:38:03,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:03,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +19: [2023-05-25 13:38:03,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +22: [2023-05-25 13:38:03,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... + 3: [2023-05-25 13:38:03,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +20: [2023-05-25 13:38:03,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +19: [2023-05-25 13:38:03,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +10: [2023-05-25 13:38:03,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +16: [2023-05-25 13:38:03,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +17: [2023-05-25 13:38:03,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +16: [2023-05-25 13:38:03,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +18: [2023-05-25 13:38:03,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +16: [2023-05-25 13:38:03,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 0: [2023-05-25 13:38:03,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. +21: [2023-05-25 13:38:03,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +22: [2023-05-25 13:38:03,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +21: [2023-05-25 13:38:03,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +22: [2023-05-25 13:38:03,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +18: [2023-05-25 13:38:03,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... + 4: [2023-05-25 13:38:03,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. + 5: [2023-05-25 13:38:03,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. +21: [2023-05-25 13:38:03,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +18: [2023-05-25 13:38:03,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +31: [2023-05-25 13:38:03,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +31: [2023-05-25 13:38:03,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +21: [2023-05-25 13:38:03,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +18: [2023-05-25 13:38:03,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... + 2: [2023-05-25 13:38:03,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 4: [2023-05-25 13:38:03,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. + 5: [2023-05-25 13:38:03,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +31: [2023-05-25 13:38:03,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +31: [2023-05-25 13:38:03,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +31: [2023-05-25 13:38:03,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +31: [2023-05-25 13:38:03,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +31: [2023-05-25 13:38:03,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:03,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 7: [2023-05-25 13:38:03,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 7: [2023-05-25 13:38:03,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +27: [2023-05-25 13:38:03,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,700] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,700] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +27: [2023-05-25 13:38:03,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. +19: [2023-05-25 13:38:03,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +19: [2023-05-25 13:38:03,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. +19: [2023-05-25 13:38:03,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +28: [2023-05-25 13:38:03,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +19: [2023-05-25 13:38:03,706] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. + 0: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... + 7: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. +28: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. + 3: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +31: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. +31: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... + 3: [2023-05-25 13:38:03,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +31: [2023-05-25 13:38:03,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +31: [2023-05-25 13:38:03,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. + 3: [2023-05-25 13:38:03,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... + 3: [2023-05-25 13:38:03,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... + 3: [2023-05-25 13:38:03,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... + 7: [2023-05-25 13:38:03,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +27: [2023-05-25 13:38:03,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +27: [2023-05-25 13:38:03,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... + 4: [2023-05-25 13:38:03,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +27: [2023-05-25 13:38:03,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 3: [2023-05-25 13:38:03,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 2: [2023-05-25 13:38:03,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +27: [2023-05-25 13:38:03,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +28: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. +26: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. +31: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. +26: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +28: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +31: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:03,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +31: [2023-05-25 13:38:03,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +31: [2023-05-25 13:38:03,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:03,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... + 4: [2023-05-25 13:38:03,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +28: [2023-05-25 13:38:03,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. + 0: [2023-05-25 13:38:03,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +25: [2023-05-25 13:38:03,723] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,723] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,723] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +25: [2023-05-25 13:38:03,723] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +29: [2023-05-25 13:38:03,723] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +29: [2023-05-25 13:38:03,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. + 0: [2023-05-25 13:38:03,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +28: [2023-05-25 13:38:03,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +28: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +28: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. + 0: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +26: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +28: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. +28: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. +28: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +28: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. +26: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... + 0: [2023-05-25 13:38:03,727] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +26: [2023-05-25 13:38:03,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. +26: [2023-05-25 13:38:03,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. +24: [2023-05-25 13:38:03,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. +24: [2023-05-25 13:38:03,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +27: [2023-05-25 13:38:03,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +27: [2023-05-25 13:38:03,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. + 4: [2023-05-25 13:38:03,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +27: [2023-05-25 13:38:03,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +31: [2023-05-25 13:38:03,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +27: [2023-05-25 13:38:03,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +31: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +25: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +19: [2023-05-25 13:38:03,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +21: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +25: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_15_optim_states.pt... + 2: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +25: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +31: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_15_optim_states.pt... + 4: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +28: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_15_optim_states.pt... +28: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_15_optim_states.pt... + 5: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... +25: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 2: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... +24: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +26: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. +24: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +26: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. + 5: [2023-05-25 13:38:03,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +30: [2023-05-25 13:38:03,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... +29: [2023-05-25 13:38:03,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +30: [2023-05-25 13:38:03,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +29: [2023-05-25 13:38:03,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... +25: [2023-05-25 13:38:03,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +25: [2023-05-25 13:38:03,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:03,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:03,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +26: [2023-05-25 13:38:03,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... + 4: [2023-05-25 13:38:03,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... + 7: [2023-05-25 13:38:03,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... +29: [2023-05-25 13:38:03,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +19: [2023-05-25 13:38:03,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... +25: [2023-05-25 13:38:03,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... + 4: [2023-05-25 13:38:03,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... + 7: [2023-05-25 13:38:03,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +25: [2023-05-25 13:38:03,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +29: [2023-05-25 13:38:03,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +29: [2023-05-25 13:38:03,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... + 3: [2023-05-25 13:38:03,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +26: [2023-05-25 13:38:03,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:03,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +25: [2023-05-25 13:38:03,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +26: [2023-05-25 13:38:03,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:03,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_13_optim_states.pt... +31: [2023-05-25 13:38:03,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_13_optim_states.pt... + 6: [2023-05-25 13:38:03,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 7: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... +28: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +17: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +28: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +28: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... +28: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +26: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +17: [2023-05-25 13:38:03,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +28: [2023-05-25 13:38:03,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +28: [2023-05-25 13:38:03,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +26: [2023-05-25 13:38:03,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:03,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +25: [2023-05-25 13:38:03,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +17: [2023-05-25 13:38:03,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +26: [2023-05-25 13:38:03,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +27: [2023-05-25 13:38:03,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +17: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +27: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. +29: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +28: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +28: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +26: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +26: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +19: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +29: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. +26: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... +29: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +26: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... + 4: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +21: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +26: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +21: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +26: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... + 7: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +24: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +30: [2023-05-25 13:38:03,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:03,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +14: [2023-05-25 13:38:03,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +28: [2023-05-25 13:38:03,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +30: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +30: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +14: [2023-05-25 13:38:03,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +30: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +26: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +30: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +30: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +26: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +24: [2023-05-25 13:38:03,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:03,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:03,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:03,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +24: [2023-05-25 13:38:03,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +19: [2023-05-25 13:38:03,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +24: [2023-05-25 13:38:03,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +24: [2023-05-25 13:38:03,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +24: [2023-05-25 13:38:03,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +30: [2023-05-25 13:38:03,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +30: [2023-05-25 13:38:03,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +24: [2023-05-25 13:38:03,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:03,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +24: [2023-05-25 13:38:03,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +25: [2023-05-25 13:38:03,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +26: [2023-05-25 13:38:03,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +26: [2023-05-25 13:38:03,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +28: [2023-05-25 13:38:03,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +28: [2023-05-25 13:38:03,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +24: [2023-05-25 13:38:03,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +24: [2023-05-25 13:38:03,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +24: [2023-05-25 13:38:03,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +24: [2023-05-25 13:38:03,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. + 3: [2023-05-25 13:38:03,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +24: [2023-05-25 13:38:03,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +24: [2023-05-25 13:38:03,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... + 2: [2023-05-25 13:38:03,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +27: [2023-05-25 13:38:03,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +27: [2023-05-25 13:38:03,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +29: [2023-05-25 13:38:03,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +29: [2023-05-25 13:38:03,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +29: [2023-05-25 13:38:03,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:03,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:03,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_15_optim_states.pt... +29: [2023-05-25 13:38:03,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_15_optim_states.pt... + 0: [2023-05-25 13:38:03,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +27: [2023-05-25 13:38:03,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +25: [2023-05-25 13:38:03,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +27: [2023-05-25 13:38:03,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +30: [2023-05-25 13:38:03,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +30: [2023-05-25 13:38:03,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +25: [2023-05-25 13:38:03,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. + 0: [2023-05-25 13:38:03,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +14: [2023-05-25 13:38:03,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:03,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +12: [2023-05-25 13:38:03,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +12: [2023-05-25 13:38:03,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +31: [2023-05-25 13:38:03,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. +31: [2023-05-25 13:38:03,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. + 0: [2023-05-25 13:38:03,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... +24: [2023-05-25 13:38:03,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_14_optim_states.pt... +24: [2023-05-25 13:38:03,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_14_optim_states.pt... +12: [2023-05-25 13:38:03,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +25: [2023-05-25 13:38:03,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_14_optim_states.pt... +25: [2023-05-25 13:38:03,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_14_optim_states.pt... +12: [2023-05-25 13:38:03,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. +26: [2023-05-25 13:38:03,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_14_optim_states.pt... + 0: [2023-05-25 13:38:03,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... +26: [2023-05-25 13:38:03,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_14_optim_states.pt... +27: [2023-05-25 13:38:03,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +27: [2023-05-25 13:38:03,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. +29: [2023-05-25 13:38:03,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +29: [2023-05-25 13:38:03,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +16: [2023-05-25 13:38:03,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +16: [2023-05-25 13:38:03,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +29: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +25: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_15_optim_states.pt... +25: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_15_optim_states.pt... +16: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +26: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_15_optim_states.pt... + 1: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +26: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_15_optim_states.pt... +29: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +30: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +30: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_15_optim_states.pt... +24: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_15_optim_states.pt... +22: [2023-05-25 13:38:03,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +30: [2023-05-25 13:38:03,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +30: [2023-05-25 13:38:03,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +16: [2023-05-25 13:38:03,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +23: [2023-05-25 13:38:03,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +28: [2023-05-25 13:38:03,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_14_optim_states.pt... +28: [2023-05-25 13:38:03,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_14_optim_states.pt... +23: [2023-05-25 13:38:03,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +19: [2023-05-25 13:38:03,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +19: [2023-05-25 13:38:03,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +14: [2023-05-25 13:38:03,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +16: [2023-05-25 13:38:03,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +22: [2023-05-25 13:38:03,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... + 1: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... +14: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +13: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +13: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +22: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... + 1: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +23: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +31: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +16: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +23: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +31: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... +31: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +17: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +17: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:03,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +31: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +13: [2023-05-25 13:38:03,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +14: [2023-05-25 13:38:03,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +13: [2023-05-25 13:38:03,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +22: [2023-05-25 13:38:03,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +20: [2023-05-25 13:38:03,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +31: [2023-05-25 13:38:03,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +31: [2023-05-25 13:38:03,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +20: [2023-05-25 13:38:03,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +30: [2023-05-25 13:38:03,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +30: [2023-05-25 13:38:03,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +27: [2023-05-25 13:38:03,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +27: [2023-05-25 13:38:03,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +27: [2023-05-25 13:38:03,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +16: [2023-05-25 13:38:03,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +27: [2023-05-25 13:38:03,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_15_optim_states.pt... +27: [2023-05-25 13:38:03,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_15_optim_states.pt... +27: [2023-05-25 13:38:03,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +27: [2023-05-25 13:38:03,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +16: [2023-05-25 13:38:03,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +19: [2023-05-25 13:38:03,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +23: [2023-05-25 13:38:03,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +23: [2023-05-25 13:38:03,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +27: [2023-05-25 13:38:03,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +14: [2023-05-25 13:38:03,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +27: [2023-05-25 13:38:03,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +22: [2023-05-25 13:38:03,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +22: [2023-05-25 13:38:03,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +19: [2023-05-25 13:38:03,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +17: [2023-05-25 13:38:03,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +17: [2023-05-25 13:38:03,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +30: [2023-05-25 13:38:03,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +20: [2023-05-25 13:38:03,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +30: [2023-05-25 13:38:03,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +20: [2023-05-25 13:38:03,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +12: [2023-05-25 13:38:03,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +12: [2023-05-25 13:38:03,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +20: [2023-05-25 13:38:03,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +14: [2023-05-25 13:38:03,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +27: [2023-05-25 13:38:03,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_13_optim_states.pt... +12: [2023-05-25 13:38:03,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +26: [2023-05-25 13:38:03,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_13_optim_states.pt... +26: [2023-05-25 13:38:03,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_13_optim_states.pt... +20: [2023-05-25 13:38:03,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +12: [2023-05-25 13:38:03,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +27: [2023-05-25 13:38:03,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_13_optim_states.pt... +12: [2023-05-25 13:38:03,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +20: [2023-05-25 13:38:03,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +11: [2023-05-25 13:38:03,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +20: [2023-05-25 13:38:03,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +18: [2023-05-25 13:38:03,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +11: [2023-05-25 13:38:03,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +18: [2023-05-25 13:38:03,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +18: [2023-05-25 13:38:03,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +18: [2023-05-25 13:38:03,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +18: [2023-05-25 13:38:03,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +15: [2023-05-25 13:38:03,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +18: [2023-05-25 13:38:03,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +15: [2023-05-25 13:38:03,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +29: [2023-05-25 13:38:03,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_13_optim_states.pt... +29: [2023-05-25 13:38:03,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_13_optim_states.pt... +30: [2023-05-25 13:38:03,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_13_optim_states.pt... +30: [2023-05-25 13:38:03,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_13_optim_states.pt... +14: [2023-05-25 13:38:03,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +15: [2023-05-25 13:38:03,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +15: [2023-05-25 13:38:03,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +23: [2023-05-25 13:38:03,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +23: [2023-05-25 13:38:03,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +31: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +31: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +12: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +27: [2023-05-25 13:38:03,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_14_optim_states.pt... +27: [2023-05-25 13:38:03,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_14_optim_states.pt... + 9: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +14: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +19: [2023-05-25 13:38:03,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +24: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_13_optim_states.pt... +22: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +24: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_13_optim_states.pt... +22: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +25: [2023-05-25 13:38:03,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. +19: [2023-05-25 13:38:03,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +25: [2023-05-25 13:38:03,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +16: [2023-05-25 13:38:03,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +16: [2023-05-25 13:38:03,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +12: [2023-05-25 13:38:03,810] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +12: [2023-05-25 13:38:03,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +12: [2023-05-25 13:38:03,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +12: [2023-05-25 13:38:03,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +11: [2023-05-25 13:38:03,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +30: [2023-05-25 13:38:03,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_15_optim_states.pt... +30: [2023-05-25 13:38:03,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_15_optim_states.pt... + 9: [2023-05-25 13:38:03,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +31: [2023-05-25 13:38:03,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_14_optim_states.pt... +31: [2023-05-25 13:38:03,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_14_optim_states.pt... +18: [2023-05-25 13:38:03,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +18: [2023-05-25 13:38:03,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +11: [2023-05-25 13:38:03,814] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +16: [2023-05-25 13:38:03,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +23: [2023-05-25 13:38:03,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +30: [2023-05-25 13:38:03,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. +30: [2023-05-25 13:38:03,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. +12: [2023-05-25 13:38:03,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_04_optim_states.pt... +13: [2023-05-25 13:38:03,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. + 9: [2023-05-25 13:38:03,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +13: [2023-05-25 13:38:03,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +13: [2023-05-25 13:38:03,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +13: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +14: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +14: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +16: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +16: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +10: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +10: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +10: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +15: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +22: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +22: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +15: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +10: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +19: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +11: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +22: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +25: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +11: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +16: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +25: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +22: [2023-05-25 13:38:03,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +10: [2023-05-25 13:38:03,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +25: [2023-05-25 13:38:03,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:03,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +12: [2023-05-25 13:38:03,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_04_optim_states.pt... +19: [2023-05-25 13:38:03,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +23: [2023-05-25 13:38:03,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +11: [2023-05-25 13:38:03,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +16: [2023-05-25 13:38:03,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +17: [2023-05-25 13:38:03,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +11: [2023-05-25 13:38:03,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +17: [2023-05-25 13:38:03,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +16: [2023-05-25 13:38:03,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +16: [2023-05-25 13:38:03,825] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +20: [2023-05-25 13:38:03,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +16: [2023-05-25 13:38:03,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +23: [2023-05-25 13:38:03,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +30: [2023-05-25 13:38:03,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +30: [2023-05-25 13:38:03,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +30: [2023-05-25 13:38:03,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +23: [2023-05-25 13:38:03,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +23: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +30: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +30: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +20: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +30: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +10: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +20: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +15: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +20: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +11: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +10: [2023-05-25 13:38:03,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +13: [2023-05-25 13:38:03,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +21: [2023-05-25 13:38:03,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:03,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:03,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:03,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +13: [2023-05-25 13:38:03,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +11: [2023-05-25 13:38:03,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +17: [2023-05-25 13:38:03,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +22: [2023-05-25 13:38:03,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_04_optim_states.pt... +22: [2023-05-25 13:38:03,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_04_optim_states.pt... +30: [2023-05-25 13:38:03,835] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +30: [2023-05-25 13:38:03,835] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +23: [2023-05-25 13:38:03,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +15: [2023-05-25 13:38:03,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +21: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +21: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +16: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +22: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +23: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +23: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +17: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +16: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +12: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +20: [2023-05-25 13:38:03,837] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +21: [2023-05-25 13:38:03,837] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +18: [2023-05-25 13:38:03,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +22: [2023-05-25 13:38:03,837] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +21: [2023-05-25 13:38:03,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +21: [2023-05-25 13:38:03,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +12: [2023-05-25 13:38:03,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +18: [2023-05-25 13:38:03,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +13: [2023-05-25 13:38:03,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_04_optim_states.pt... +18: [2023-05-25 13:38:03,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +13: [2023-05-25 13:38:03,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_04_optim_states.pt... +20: [2023-05-25 13:38:03,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +20: [2023-05-25 13:38:03,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +12: [2023-05-25 13:38:03,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +18: [2023-05-25 13:38:03,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +15: [2023-05-25 13:38:03,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +15: [2023-05-25 13:38:03,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +12: [2023-05-25 13:38:03,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +12: [2023-05-25 13:38:03,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +12: [2023-05-25 13:38:03,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +20: [2023-05-25 13:38:03,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +12: [2023-05-25 13:38:03,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... +12: [2023-05-25 13:38:03,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... +18: [2023-05-25 13:38:03,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +18: [2023-05-25 13:38:03,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +11: [2023-05-25 13:38:03,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +18: [2023-05-25 13:38:03,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +21: [2023-05-25 13:38:03,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +16: [2023-05-25 13:38:03,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:03,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +15: [2023-05-25 13:38:03,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +15: [2023-05-25 13:38:03,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +16: [2023-05-25 13:38:03,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +11: [2023-05-25 13:38:03,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +28: [2023-05-25 13:38:03,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_13_optim_states.pt... +28: [2023-05-25 13:38:03,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_13_optim_states.pt... +10: [2023-05-25 13:38:03,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +10: [2023-05-25 13:38:03,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +18: [2023-05-25 13:38:03,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +18: [2023-05-25 13:38:03,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +18: [2023-05-25 13:38:03,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +16: [2023-05-25 13:38:03,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +18: [2023-05-25 13:38:03,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +10: [2023-05-25 13:38:03,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +16: [2023-05-25 13:38:03,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +18: [2023-05-25 13:38:03,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +25: [2023-05-25 13:38:03,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +25: [2023-05-25 13:38:03,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +10: [2023-05-25 13:38:03,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +17: [2023-05-25 13:38:03,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +25: [2023-05-25 13:38:03,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +11: [2023-05-25 13:38:03,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +11: [2023-05-25 13:38:03,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +25: [2023-05-25 13:38:03,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +13: [2023-05-25 13:38:03,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +13: [2023-05-25 13:38:03,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +10: [2023-05-25 13:38:03,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +15: [2023-05-25 13:38:03,864] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +17: [2023-05-25 13:38:03,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +12: [2023-05-25 13:38:03,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +21: [2023-05-25 13:38:03,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +12: [2023-05-25 13:38:03,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +10: [2023-05-25 13:38:03,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +13: [2023-05-25 13:38:03,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +13: [2023-05-25 13:38:03,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +15: [2023-05-25 13:38:03,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +17: [2023-05-25 13:38:03,869] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +17: [2023-05-25 13:38:03,869] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +21: [2023-05-25 13:38:03,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +14: [2023-05-25 13:38:03,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +14: [2023-05-25 13:38:03,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +14: [2023-05-25 13:38:03,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_04_optim_states.pt... +29: [2023-05-25 13:38:03,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. +29: [2023-05-25 13:38:03,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. + 8: [2023-05-25 13:38:03,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +15: [2023-05-25 13:38:03,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +22: [2023-05-25 13:38:03,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +22: [2023-05-25 13:38:03,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +21: [2023-05-25 13:38:03,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +20: [2023-05-25 13:38:03,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +15: [2023-05-25 13:38:03,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +20: [2023-05-25 13:38:03,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +18: [2023-05-25 13:38:03,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +19: [2023-05-25 13:38:03,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +14: [2023-05-25 13:38:03,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_04_optim_states.pt... +19: [2023-05-25 13:38:03,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +15: [2023-05-25 13:38:03,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +21: [2023-05-25 13:38:03,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +18: [2023-05-25 13:38:03,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +15: [2023-05-25 13:38:03,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_04_optim_states.pt... +15: [2023-05-25 13:38:03,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_04_optim_states.pt... +12: [2023-05-25 13:38:03,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +15: [2023-05-25 13:38:03,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +21: [2023-05-25 13:38:03,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +10: [2023-05-25 13:38:03,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_04_optim_states.pt... +21: [2023-05-25 13:38:03,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:03,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +10: [2023-05-25 13:38:03,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_04_optim_states.pt... +12: [2023-05-25 13:38:03,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +14: [2023-05-25 13:38:03,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +21: [2023-05-25 13:38:03,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +14: [2023-05-25 13:38:03,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +29: [2023-05-25 13:38:03,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +29: [2023-05-25 13:38:03,885] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:03,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +29: [2023-05-25 13:38:03,885] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:03,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +22: [2023-05-25 13:38:03,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +10: [2023-05-25 13:38:03,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +22: [2023-05-25 13:38:03,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +18: [2023-05-25 13:38:03,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +20: [2023-05-25 13:38:03,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +19: [2023-05-25 13:38:03,890] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +18: [2023-05-25 13:38:03,890] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +20: [2023-05-25 13:38:03,890] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +29: [2023-05-25 13:38:03,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +19: [2023-05-25 13:38:03,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +13: [2023-05-25 13:38:03,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +29: [2023-05-25 13:38:03,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +13: [2023-05-25 13:38:03,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +15: [2023-05-25 13:38:03,894] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +11: [2023-05-25 13:38:03,894] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +10: [2023-05-25 13:38:03,895] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +10: [2023-05-25 13:38:03,896] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +15: [2023-05-25 13:38:03,896] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +11: [2023-05-25 13:38:03,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +15: [2023-05-25 13:38:03,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +15: [2023-05-25 13:38:03,899] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +29: [2023-05-25 13:38:03,900] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. + 9: [2023-05-25 13:38:03,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_04_optim_states.pt... + 9: [2023-05-25 13:38:03,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_04_optim_states.pt... +10: [2023-05-25 13:38:03,902] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +23: [2023-05-25 13:38:03,904] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +10: [2023-05-25 13:38:03,904] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +23: [2023-05-25 13:38:03,905] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +29: [2023-05-25 13:38:03,905] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +13: [2023-05-25 13:38:03,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +13: [2023-05-25 13:38:03,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +12: [2023-05-25 13:38:03,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +30: [2023-05-25 13:38:03,908] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_14_optim_states.pt... +10: [2023-05-25 13:38:03,908] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +11: [2023-05-25 13:38:03,908] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +20: [2023-05-25 13:38:03,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +11: [2023-05-25 13:38:03,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +14: [2023-05-25 13:38:03,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +10: [2023-05-25 13:38:03,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +20: [2023-05-25 13:38:03,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +14: [2023-05-25 13:38:03,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... + 8: [2023-05-25 13:38:03,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +11: [2023-05-25 13:38:03,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_04_optim_states.pt... +11: [2023-05-25 13:38:03,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_04_optim_states.pt... +14: [2023-05-25 13:38:03,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +22: [2023-05-25 13:38:03,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +23: [2023-05-25 13:38:03,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +14: [2023-05-25 13:38:03,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +23: [2023-05-25 13:38:03,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +22: [2023-05-25 13:38:03,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +22: [2023-05-25 13:38:03,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +20: [2023-05-25 13:38:03,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +22: [2023-05-25 13:38:03,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... + 9: [2023-05-25 13:38:03,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +30: [2023-05-25 13:38:03,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_14_optim_states.pt... + 9: [2023-05-25 13:38:03,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +12: [2023-05-25 13:38:03,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +20: [2023-05-25 13:38:03,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... + 8: [2023-05-25 13:38:03,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +19: [2023-05-25 13:38:03,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +14: [2023-05-25 13:38:03,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. + 9: [2023-05-25 13:38:03,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... + 9: [2023-05-25 13:38:03,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +14: [2023-05-25 13:38:03,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. + 8: [2023-05-25 13:38:03,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +19: [2023-05-25 13:38:03,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +15: [2023-05-25 13:38:03,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +15: [2023-05-25 13:38:03,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +19: [2023-05-25 13:38:03,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. +17: [2023-05-25 13:38:03,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +17: [2023-05-25 13:38:03,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +19: [2023-05-25 13:38:03,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +12: [2023-05-25 13:38:03,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +12: [2023-05-25 13:38:03,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +11: [2023-05-25 13:38:03,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +11: [2023-05-25 13:38:03,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +11: [2023-05-25 13:38:03,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +11: [2023-05-25 13:38:03,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... + 3: [2023-05-25 13:38:03,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +25: [2023-05-25 13:38:03,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_13_optim_states.pt... +25: [2023-05-25 13:38:03,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_13_optim_states.pt... +11: [2023-05-25 13:38:03,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +14: [2023-05-25 13:38:03,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +13: [2023-05-25 13:38:03,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +11: [2023-05-25 13:38:03,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +23: [2023-05-25 13:38:03,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +14: [2023-05-25 13:38:03,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:03,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +23: [2023-05-25 13:38:03,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +15: [2023-05-25 13:38:03,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +13: [2023-05-25 13:38:03,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +13: [2023-05-25 13:38:03,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +15: [2023-05-25 13:38:03,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. + 9: [2023-05-25 13:38:03,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +10: [2023-05-25 13:38:03,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +13: [2023-05-25 13:38:03,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +10: [2023-05-25 13:38:03,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. +17: [2023-05-25 13:38:03,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +17: [2023-05-25 13:38:03,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +10: [2023-05-25 13:38:03,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... +11: [2023-05-25 13:38:03,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +11: [2023-05-25 13:38:03,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +23: [2023-05-25 13:38:03,946] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +10: [2023-05-25 13:38:03,946] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +23: [2023-05-25 13:38:03,948] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +10: [2023-05-25 13:38:03,949] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... + 9: [2023-05-25 13:38:03,953] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:03,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,953] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,954] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,955] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. +10: [2023-05-25 13:38:03,955] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... + 4: [2023-05-25 13:38:03,955] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. +15: [2023-05-25 13:38:03,958] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +15: [2023-05-25 13:38:03,959] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,959] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. +29: [2023-05-25 13:38:03,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_14_optim_states.pt... +29: [2023-05-25 13:38:03,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_14_optim_states.pt... + 4: [2023-05-25 13:38:03,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. +15: [2023-05-25 13:38:03,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... +15: [2023-05-25 13:38:03,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... +14: [2023-05-25 13:38:03,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +17: [2023-05-25 13:38:03,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +14: [2023-05-25 13:38:03,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. +17: [2023-05-25 13:38:03,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +11: [2023-05-25 13:38:03,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +17: [2023-05-25 13:38:03,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +17: [2023-05-25 13:38:03,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... + 1: [2023-05-25 13:38:03,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +11: [2023-05-25 13:38:03,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... + 6: [2023-05-25 13:38:03,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +13: [2023-05-25 13:38:03,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +13: [2023-05-25 13:38:03,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... +14: [2023-05-25 13:38:03,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... + 6: [2023-05-25 13:38:03,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 8: [2023-05-25 13:38:03,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... +14: [2023-05-25 13:38:03,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... + 8: [2023-05-25 13:38:03,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... +17: [2023-05-25 13:38:03,997] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. +17: [2023-05-25 13:38:03,997] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... + 9: [2023-05-25 13:38:03,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... + 7: [2023-05-25 13:38:03,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 0: [2023-05-25 13:38:03,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 0: [2023-05-25 13:38:04,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 0: [2023-05-25 13:38:04,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 2: [2023-05-25 13:38:04,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 2: [2023-05-25 13:38:04,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. +11: [2023-05-25 13:38:04,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... +11: [2023-05-25 13:38:04,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... + 5: [2023-05-25 13:38:04,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +13: [2023-05-25 13:38:04,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +13: [2023-05-25 13:38:04,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 4: [2023-05-25 13:38:04,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. +17: [2023-05-25 13:38:04,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +17: [2023-05-25 13:38:04,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 0: [2023-05-25 13:38:04,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 5: [2023-05-25 13:38:04,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +19: [2023-05-25 13:38:04,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. +19: [2023-05-25 13:38:04,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. +22: [2023-05-25 13:38:04,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. +22: [2023-05-25 13:38:04,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 3: [2023-05-25 13:38:04,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 7: [2023-05-25 13:38:04,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 7: [2023-05-25 13:38:04,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 4: [2023-05-25 13:38:04,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 1: [2023-05-25 13:38:04,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 1: [2023-05-25 13:38:04,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +20: [2023-05-25 13:38:04,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. +20: [2023-05-25 13:38:04,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 1: [2023-05-25 13:38:04,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. +21: [2023-05-25 13:38:04,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. +18: [2023-05-25 13:38:04,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. +21: [2023-05-25 13:38:04,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. +18: [2023-05-25 13:38:04,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. + 0: [2023-05-25 13:38:04,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +22: [2023-05-25 13:38:04,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +19: [2023-05-25 13:38:04,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +19: [2023-05-25 13:38:04,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +22: [2023-05-25 13:38:04,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +17: [2023-05-25 13:38:04,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +13: [2023-05-25 13:38:04,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +13: [2023-05-25 13:38:04,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +20: [2023-05-25 13:38:04,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +20: [2023-05-25 13:38:04,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +18: [2023-05-25 13:38:04,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +21: [2023-05-25 13:38:04,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +18: [2023-05-25 13:38:04,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +21: [2023-05-25 13:38:04,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +17: [2023-05-25 13:38:04,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +17: [2023-05-25 13:38:04,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... + 5: [2023-05-25 13:38:04,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 7: [2023-05-25 13:38:04,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +13: [2023-05-25 13:38:04,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... +13: [2023-05-25 13:38:04,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... + 5: [2023-05-25 13:38:04,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 5: [2023-05-25 13:38:04,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... +17: [2023-05-25 13:38:04,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... + 5: [2023-05-25 13:38:04,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 5: [2023-05-25 13:38:04,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 7: [2023-05-25 13:38:04,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 7: [2023-05-25 13:38:04,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 5: [2023-05-25 13:38:04,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +23: [2023-05-25 13:38:04,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... +23: [2023-05-25 13:38:04,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. + 3: [2023-05-25 13:38:04,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 1: [2023-05-25 13:38:04,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 3: [2023-05-25 13:38:04,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 5: [2023-05-25 13:38:04,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 2: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 3: [2023-05-25 13:38:04,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 2: [2023-05-25 13:38:04,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 2: [2023-05-25 13:38:04,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 0: [2023-05-25 13:38:04,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 2: [2023-05-25 13:38:04,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 0: [2023-05-25 13:38:04,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 0: [2023-05-25 13:38:04,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 0: [2023-05-25 13:38:04,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 4: [2023-05-25 13:38:04,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 4: [2023-05-25 13:38:04,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 0: [2023-05-25 13:38:04,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... +19: [2023-05-25 13:38:04,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +22: [2023-05-25 13:38:04,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +19: [2023-05-25 13:38:04,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 4: [2023-05-25 13:38:04,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 3: [2023-05-25 13:38:04,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 4: [2023-05-25 13:38:04,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 6: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. +22: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... + 4: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 4: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... +23: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +22: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 6: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 0: [2023-05-25 13:38:04,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... +23: [2023-05-25 13:38:04,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +22: [2023-05-25 13:38:04,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... +14: [2023-05-25 13:38:04,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. +19: [2023-05-25 13:38:04,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... +19: [2023-05-25 13:38:04,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... + 3: [2023-05-25 13:38:04,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... +20: [2023-05-25 13:38:04,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 7: [2023-05-25 13:38:04,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... +14: [2023-05-25 13:38:04,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. + 0: [2023-05-25 13:38:04,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... +18: [2023-05-25 13:38:04,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +20: [2023-05-25 13:38:04,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +20: [2023-05-25 13:38:04,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... +21: [2023-05-25 13:38:04,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +18: [2023-05-25 13:38:04,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... + 0: [2023-05-25 13:38:04,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... +20: [2023-05-25 13:38:04,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... +18: [2023-05-25 13:38:04,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +21: [2023-05-25 13:38:04,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +18: [2023-05-25 13:38:04,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... +21: [2023-05-25 13:38:04,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... +21: [2023-05-25 13:38:04,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... +10: [2023-05-25 13:38:04,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. + 6: [2023-05-25 13:38:04,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +10: [2023-05-25 13:38:04,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. + 1: [2023-05-25 13:38:04,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +29: [2023-05-25 13:38:04,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_12_optim_states.pt. +29: [2023-05-25 13:38:04,076] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 232 + 3: [2023-05-25 13:38:04,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 9: [2023-05-25 13:38:04,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. + 9: [2023-05-25 13:38:04,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. + 7: [2023-05-25 13:38:04,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +14: [2023-05-25 13:38:04,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +14: [2023-05-25 13:38:04,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +12: [2023-05-25 13:38:04,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. +12: [2023-05-25 13:38:04,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. + 2: [2023-05-25 13:38:04,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +10: [2023-05-25 13:38:04,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:04,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 1: [2023-05-25 13:38:04,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... +23: [2023-05-25 13:38:04,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +23: [2023-05-25 13:38:04,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... + 4: [2023-05-25 13:38:04,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 9: [2023-05-25 13:38:04,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 9: [2023-05-25 13:38:04,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +23: [2023-05-25 13:38:04,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +23: [2023-05-25 13:38:04,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... + 1: [2023-05-25 13:38:04,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 1: [2023-05-25 13:38:04,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... +16: [2023-05-25 13:38:04,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. + 7: [2023-05-25 13:38:04,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +12: [2023-05-25 13:38:04,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,098] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +16: [2023-05-25 13:38:04,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. +12: [2023-05-25 13:38:04,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:04,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +10: [2023-05-25 13:38:04,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... + 4: [2023-05-25 13:38:04,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +16: [2023-05-25 13:38:04,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +14: [2023-05-25 13:38:04,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +11: [2023-05-25 13:38:04,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. +11: [2023-05-25 13:38:04,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. + 6: [2023-05-25 13:38:04,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +16: [2023-05-25 13:38:04,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 9: [2023-05-25 13:38:04,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +14: [2023-05-25 13:38:04,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 9: [2023-05-25 13:38:04,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +14: [2023-05-25 13:38:04,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +14: [2023-05-25 13:38:04,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... + 6: [2023-05-25 13:38:04,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... +10: [2023-05-25 13:38:04,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 9: [2023-05-25 13:38:04,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +10: [2023-05-25 13:38:04,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +13: [2023-05-25 13:38:04,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. +11: [2023-05-25 13:38:04,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +13: [2023-05-25 13:38:04,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. + 9: [2023-05-25 13:38:04,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +11: [2023-05-25 13:38:04,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +12: [2023-05-25 13:38:04,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +12: [2023-05-25 13:38:04,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +12: [2023-05-25 13:38:04,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +22: [2023-05-25 13:38:04,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +12: [2023-05-25 13:38:04,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +13: [2023-05-25 13:38:04,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +13: [2023-05-25 13:38:04,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +16: [2023-05-25 13:38:04,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +22: [2023-05-25 13:38:04,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +16: [2023-05-25 13:38:04,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... +11: [2023-05-25 13:38:04,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +16: [2023-05-25 13:38:04,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +11: [2023-05-25 13:38:04,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +16: [2023-05-25 13:38:04,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... +22: [2023-05-25 13:38:04,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +22: [2023-05-25 13:38:04,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +11: [2023-05-25 13:38:04,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +11: [2023-05-25 13:38:04,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +21: [2023-05-25 13:38:04,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +21: [2023-05-25 13:38:04,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +19: [2023-05-25 13:38:04,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:04,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:04,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +17: [2023-05-25 13:38:04,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:04,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:04,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +17: [2023-05-25 13:38:04,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +19: [2023-05-25 13:38:04,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +21: [2023-05-25 13:38:04,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +13: [2023-05-25 13:38:04,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +13: [2023-05-25 13:38:04,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +21: [2023-05-25 13:38:04,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +13: [2023-05-25 13:38:04,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +13: [2023-05-25 13:38:04,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +16: [2023-05-25 13:38:04,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +16: [2023-05-25 13:38:04,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +18: [2023-05-25 13:38:04,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +18: [2023-05-25 13:38:04,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +23: [2023-05-25 13:38:04,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +23: [2023-05-25 13:38:04,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +16: [2023-05-25 13:38:04,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +20: [2023-05-25 13:38:04,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +20: [2023-05-25 13:38:04,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +16: [2023-05-25 13:38:04,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +15: [2023-05-25 13:38:04,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. +15: [2023-05-25 13:38:04,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. +23: [2023-05-25 13:38:04,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:04,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:04,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +21: [2023-05-25 13:38:04,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +23: [2023-05-25 13:38:04,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:04,196] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +23: [2023-05-25 13:38:04,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +18: [2023-05-25 13:38:04,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +19: [2023-05-25 13:38:04,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +23: [2023-05-25 13:38:04,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +23: [2023-05-25 13:38:04,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +18: [2023-05-25 13:38:04,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +23: [2023-05-25 13:38:04,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +19: [2023-05-25 13:38:04,199] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:04,199] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:04,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +18: [2023-05-25 13:38:04,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +20: [2023-05-25 13:38:04,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +15: [2023-05-25 13:38:04,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +19: [2023-05-25 13:38:04,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +20: [2023-05-25 13:38:04,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +15: [2023-05-25 13:38:04,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +18: [2023-05-25 13:38:04,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:04,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:04,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +21: [2023-05-25 13:38:04,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +18: [2023-05-25 13:38:04,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +18: [2023-05-25 13:38:04,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:04,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:04,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:04,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +16: [2023-05-25 13:38:04,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:04,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +19: [2023-05-25 13:38:04,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +19: [2023-05-25 13:38:04,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +18: [2023-05-25 13:38:04,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +20: [2023-05-25 13:38:04,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:04,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:04,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +17: [2023-05-25 13:38:04,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +20: [2023-05-25 13:38:04,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:04,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +20: [2023-05-25 13:38:04,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:04,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +15: [2023-05-25 13:38:04,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +23: [2023-05-25 13:38:04,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +23: [2023-05-25 13:38:04,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +17: [2023-05-25 13:38:04,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +17: [2023-05-25 13:38:04,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +15: [2023-05-25 13:38:04,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +19: [2023-05-25 13:38:04,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_08_optim_states.pt... +22: [2023-05-25 13:38:04,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +19: [2023-05-25 13:38:04,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_08_optim_states.pt... +22: [2023-05-25 13:38:04,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +17: [2023-05-25 13:38:04,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_08_optim_states.pt... +17: [2023-05-25 13:38:04,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_08_optim_states.pt... +20: [2023-05-25 13:38:04,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +20: [2023-05-25 13:38:04,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +16: [2023-05-25 13:38:04,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +16: [2023-05-25 13:38:04,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:04,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +16: [2023-05-25 13:38:04,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +23: [2023-05-25 13:38:04,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +23: [2023-05-25 13:38:04,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +16: [2023-05-25 13:38:04,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +17: [2023-05-25 13:38:04,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +19: [2023-05-25 13:38:04,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +19: [2023-05-25 13:38:04,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +11: [2023-05-25 13:38:04,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. +11: [2023-05-25 13:38:04,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. +17: [2023-05-25 13:38:04,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +23: [2023-05-25 13:38:04,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +23: [2023-05-25 13:38:04,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +23: [2023-05-25 13:38:04,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +16: [2023-05-25 13:38:04,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +15: [2023-05-25 13:38:04,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +18: [2023-05-25 13:38:04,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:04,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:04,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:04,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +17: [2023-05-25 13:38:04,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +15: [2023-05-25 13:38:04,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +20: [2023-05-25 13:38:04,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +23: [2023-05-25 13:38:04,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +18: [2023-05-25 13:38:04,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:04,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +23: [2023-05-25 13:38:04,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +18: [2023-05-25 13:38:04,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:04,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +18: [2023-05-25 13:38:04,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +23: [2023-05-25 13:38:04,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +22: [2023-05-25 13:38:04,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +18: [2023-05-25 13:38:04,242] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +21: [2023-05-25 13:38:04,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:04,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:04,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:04,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:04,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:04,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:04,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:04,243] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +21: [2023-05-25 13:38:04,243] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +21: [2023-05-25 13:38:04,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +21: [2023-05-25 13:38:04,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +21: [2023-05-25 13:38:04,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +21: [2023-05-25 13:38:04,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +21: [2023-05-25 13:38:04,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +20: [2023-05-25 13:38:04,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:04,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +20: [2023-05-25 13:38:04,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +20: [2023-05-25 13:38:04,246] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +19: [2023-05-25 13:38:04,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:04,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:04,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +19: [2023-05-25 13:38:04,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +20: [2023-05-25 13:38:04,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +16: [2023-05-25 13:38:04,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:04,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:04,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:04,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 8: [2023-05-25 13:38:04,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. +23: [2023-05-25 13:38:04,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_08_optim_states.pt... +23: [2023-05-25 13:38:04,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_08_optim_states.pt... + 8: [2023-05-25 13:38:04,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. +18: [2023-05-25 13:38:04,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:04,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:04,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:04,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +17: [2023-05-25 13:38:04,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +17: [2023-05-25 13:38:04,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:04,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:04,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:04,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:04,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:04,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:04,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:04,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:04,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +15: [2023-05-25 13:38:04,271] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. +22: [2023-05-25 13:38:04,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:04,271] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 8: [2023-05-25 13:38:04,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +17: [2023-05-25 13:38:04,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +22: [2023-05-25 13:38:04,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +25: [2023-05-25 13:38:04,271] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_12_optim_states.pt. +22: [2023-05-25 13:38:04,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +25: [2023-05-25 13:38:04,272] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 204 +22: [2023-05-25 13:38:04,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +23: [2023-05-25 13:38:04,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 8: [2023-05-25 13:38:04,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +21: [2023-05-25 13:38:04,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:04,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 8: [2023-05-25 13:38:04,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. + 8: [2023-05-25 13:38:04,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +15: [2023-05-25 13:38:04,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. +16: [2023-05-25 13:38:04,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:04,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:04,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:04,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +20: [2023-05-25 13:38:04,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +17: [2023-05-25 13:38:04,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +22: [2023-05-25 13:38:04,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +22: [2023-05-25 13:38:04,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +17: [2023-05-25 13:38:04,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +23: [2023-05-25 13:38:04,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +16: [2023-05-25 13:38:04,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +16: [2023-05-25 13:38:04,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +23: [2023-05-25 13:38:04,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +23: [2023-05-25 13:38:04,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +19: [2023-05-25 13:38:04,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +19: [2023-05-25 13:38:04,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:04,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +12: [2023-05-25 13:38:04,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +12: [2023-05-25 13:38:04,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +12: [2023-05-25 13:38:04,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. +12: [2023-05-25 13:38:04,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. +22: [2023-05-25 13:38:04,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:04,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +15: [2023-05-25 13:38:04,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_07_optim_states.pt... +15: [2023-05-25 13:38:04,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_07_optim_states.pt... +11: [2023-05-25 13:38:04,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_07_optim_states.pt... +11: [2023-05-25 13:38:04,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_07_optim_states.pt... +20: [2023-05-25 13:38:04,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_08_optim_states.pt... +21: [2023-05-25 13:38:04,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_08_optim_states.pt... + 9: [2023-05-25 13:38:04,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. + 9: [2023-05-25 13:38:04,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. + 8: [2023-05-25 13:38:04,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +20: [2023-05-25 13:38:04,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_08_optim_states.pt... +16: [2023-05-25 13:38:04,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_08_optim_states.pt... +16: [2023-05-25 13:38:04,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_08_optim_states.pt... +18: [2023-05-25 13:38:04,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_08_optim_states.pt... +18: [2023-05-25 13:38:04,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_08_optim_states.pt... +21: [2023-05-25 13:38:04,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_08_optim_states.pt... + 8: [2023-05-25 13:38:04,310] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... + 8: [2023-05-25 13:38:04,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 8: [2023-05-25 13:38:04,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +13: [2023-05-25 13:38:04,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. +13: [2023-05-25 13:38:04,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +14: [2023-05-25 13:38:04,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +14: [2023-05-25 13:38:04,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 8: [2023-05-25 13:38:04,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 8: [2023-05-25 13:38:04,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. + 2: [2023-05-25 13:38:04,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +10: [2023-05-25 13:38:04,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. +10: [2023-05-25 13:38:04,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. + 2: [2023-05-25 13:38:04,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 9: [2023-05-25 13:38:04,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. + 8: [2023-05-25 13:38:04,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_05_optim_states.pt... + 8: [2023-05-25 13:38:04,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_05_optim_states.pt... +22: [2023-05-25 13:38:04,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_08_optim_states.pt... +22: [2023-05-25 13:38:04,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_08_optim_states.pt... + 9: [2023-05-25 13:38:04,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +27: [2023-05-25 13:38:04,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_12_optim_states.pt. +11: [2023-05-25 13:38:04,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +11: [2023-05-25 13:38:04,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +13: [2023-05-25 13:38:04,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +13: [2023-05-25 13:38:04,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +10: [2023-05-25 13:38:04,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +10: [2023-05-25 13:38:04,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +14: [2023-05-25 13:38:04,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +14: [2023-05-25 13:38:04,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. + 5: [2023-05-25 13:38:04,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +13: [2023-05-25 13:38:04,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_07_optim_states.pt... +15: [2023-05-25 13:38:04,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +15: [2023-05-25 13:38:04,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. + 5: [2023-05-25 13:38:04,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +13: [2023-05-25 13:38:04,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_07_optim_states.pt... +14: [2023-05-25 13:38:04,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_07_optim_states.pt... +14: [2023-05-25 13:38:04,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_07_optim_states.pt... + 6: [2023-05-25 13:38:04,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +12: [2023-05-25 13:38:04,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_05_optim_states.pt... +12: [2023-05-25 13:38:04,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_07_optim_states.pt... +12: [2023-05-25 13:38:04,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_07_optim_states.pt... +12: [2023-05-25 13:38:04,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_05_optim_states.pt... + 6: [2023-05-25 13:38:04,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 9: [2023-05-25 13:38:04,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_07_optim_states.pt... + 9: [2023-05-25 13:38:04,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_07_optim_states.pt... +31: [2023-05-25 13:38:04,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_12_optim_states.pt. +31: [2023-05-25 13:38:04,346] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 248 +27: [2023-05-25 13:38:04,333] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 220 + 4: [2023-05-25 13:38:04,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 8: [2023-05-25 13:38:04,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_07_optim_states.pt... + 8: [2023-05-25 13:38:04,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_07_optim_states.pt... + 2: [2023-05-25 13:38:04,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 2: [2023-05-25 13:38:04,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 9: [2023-05-25 13:38:04,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_05_optim_states.pt... + 9: [2023-05-25 13:38:04,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_05_optim_states.pt... + 1: [2023-05-25 13:38:04,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +10: [2023-05-25 13:38:04,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_07_optim_states.pt... +10: [2023-05-25 13:38:04,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_07_optim_states.pt... +10: [2023-05-25 13:38:04,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_05_optim_states.pt... +10: [2023-05-25 13:38:04,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_05_optim_states.pt... + 7: [2023-05-25 13:38:04,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +14: [2023-05-25 13:38:04,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_05_optim_states.pt... +14: [2023-05-25 13:38:04,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_05_optim_states.pt... +15: [2023-05-25 13:38:04,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_05_optim_states.pt... +15: [2023-05-25 13:38:04,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_05_optim_states.pt... + 6: [2023-05-25 13:38:04,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... + 2: [2023-05-25 13:38:04,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 3: [2023-05-25 13:38:04,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 2: [2023-05-25 13:38:04,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 7: [2023-05-25 13:38:04,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 2: [2023-05-25 13:38:04,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +27: [2023-05-25 13:38:04,374] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 220 + 6: [2023-05-25 13:38:04,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... + 3: [2023-05-25 13:38:04,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +21: [2023-05-25 13:38:04,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +21: [2023-05-25 13:38:04,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... + 3: [2023-05-25 13:38:04,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... + 7: [2023-05-25 13:38:04,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +25: [2023-05-25 13:38:04,386] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 204 + 7: [2023-05-25 13:38:04,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 4: [2023-05-25 13:38:04,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 9: [2023-05-25 13:38:04,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. + 9: [2023-05-25 13:38:04,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. + 4: [2023-05-25 13:38:04,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +13: [2023-05-25 13:38:04,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_05_optim_states.pt... +13: [2023-05-25 13:38:04,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_05_optim_states.pt... + 1: [2023-05-25 13:38:04,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... + 1: [2023-05-25 13:38:04,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +21: [2023-05-25 13:38:04,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... + 2: [2023-05-25 13:38:04,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +31: [2023-05-25 13:38:04,394] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 248 +21: [2023-05-25 13:38:04,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... + 7: [2023-05-25 13:38:04,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... + 3: [2023-05-25 13:38:04,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 0: [2023-05-25 13:38:04,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +11: [2023-05-25 13:38:04,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_05_optim_states.pt... +11: [2023-05-25 13:38:04,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_05_optim_states.pt... + 5: [2023-05-25 13:38:04,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... + 2: [2023-05-25 13:38:04,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 5: [2023-05-25 13:38:04,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... + 5: [2023-05-25 13:38:04,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... + 5: [2023-05-25 13:38:04,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... + 2: [2023-05-25 13:38:04,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... +27: [2023-05-25 13:38:04,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_12_optim_states.pt. + 9: [2023-05-25 13:38:04,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_06_optim_states.pt... + 9: [2023-05-25 13:38:04,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_06_optim_states.pt... + 2: [2023-05-25 13:38:04,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... +29: [2023-05-25 13:38:04,411] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 232 + 0: > overriding learning rate value to 0.0002 + 0: > overriding minimum learning rate value to 2e-05 + 0: > overriding warmup iterations value to 0 + 0: > overriding total number of iterations value to 1 + 0: > overriding decay style value to cosine +25: [2023-05-25 13:38:04,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_15_optim_states.pt. +25: [2023-05-25 13:38:04,414] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 203 + 7: [2023-05-25 13:38:04,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +11: [2023-05-25 13:38:04,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. +11: [2023-05-25 13:38:04,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. + 4: [2023-05-25 13:38:04,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... + 7: [2023-05-25 13:38:04,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 0: [2023-05-25 13:38:04,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... + 0: [2023-05-25 13:38:04,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... + 4: [2023-05-25 13:38:04,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 4: [2023-05-25 13:38:04,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. +27: [2023-05-25 13:38:04,408] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 216 + 7: [2023-05-25 13:38:04,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... +22: [2023-05-25 13:38:04,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +22: [2023-05-25 13:38:04,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +14: [2023-05-25 13:38:04,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. +14: [2023-05-25 13:38:04,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. +21: [2023-05-25 13:38:04,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +27: [2023-05-25 13:38:04,420] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 216 +12: [2023-05-25 13:38:04,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. +12: [2023-05-25 13:38:04,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. +21: [2023-05-25 13:38:04,429] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 6: [2023-05-25 13:38:04,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 4: [2023-05-25 13:38:04,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +22: [2023-05-25 13:38:04,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +21: [2023-05-25 13:38:04,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... +21: [2023-05-25 13:38:04,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... +22: [2023-05-25 13:38:04,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 3: [2023-05-25 13:38:04,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 3: [2023-05-25 13:38:04,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 4: [2023-05-25 13:38:04,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 5: [2023-05-25 13:38:04,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 6: [2023-05-25 13:38:04,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 6: [2023-05-25 13:38:04,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 1: [2023-05-25 13:38:04,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 1: [2023-05-25 13:38:04,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 1: [2023-05-25 13:38:04,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 6: [2023-05-25 13:38:04,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. +17: [2023-05-25 13:38:04,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +17: [2023-05-25 13:38:04,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. +10: [2023-05-25 13:38:04,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. + 4: [2023-05-25 13:38:04,454] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... +13: [2023-05-25 13:38:04,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. +13: [2023-05-25 13:38:04,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. +31: [2023-05-25 13:38:04,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_13_optim_states.pt. +31: [2023-05-25 13:38:04,459] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 253 + 5: [2023-05-25 13:38:04,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 5: [2023-05-25 13:38:04,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. +22: [2023-05-25 13:38:04,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 4: [2023-05-25 13:38:04,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +17: [2023-05-25 13:38:04,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 1: [2023-05-25 13:38:04,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +28: [2023-05-25 13:38:04,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_12_optim_states.pt. +28: [2023-05-25 13:38:04,464] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 228 +17: [2023-05-25 13:38:04,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +30: [2023-05-25 13:38:04,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_12_optim_states.pt. +30: [2023-05-25 13:38:04,464] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 244 + 4: [2023-05-25 13:38:04,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... + 1: [2023-05-25 13:38:04,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +18: [2023-05-25 13:38:04,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +22: [2023-05-25 13:38:04,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:04,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... +18: [2023-05-25 13:38:04,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. + 2: [2023-05-25 13:38:04,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 2: [2023-05-25 13:38:04,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 6: [2023-05-25 13:38:04,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +22: [2023-05-25 13:38:04,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... + 5: [2023-05-25 13:38:04,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +30: [2023-05-25 13:38:04,475] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 244 +28: [2023-05-25 13:38:04,476] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 228 + 0: [2023-05-25 13:38:04,476] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +23: [2023-05-25 13:38:04,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +23: [2023-05-25 13:38:04,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. +18: [2023-05-25 13:38:04,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. +16: [2023-05-25 13:38:04,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +16: [2023-05-25 13:38:04,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +18: [2023-05-25 13:38:04,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 4: [2023-05-25 13:38:04,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 2: [2023-05-25 13:38:04,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +20: [2023-05-25 13:38:04,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +20: [2023-05-25 13:38:04,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 6: [2023-05-25 13:38:04,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 3: [2023-05-25 13:38:04,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. +23: [2023-05-25 13:38:04,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +17: [2023-05-25 13:38:04,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. +23: [2023-05-25 13:38:04,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 3: [2023-05-25 13:38:04,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 3: [2023-05-25 13:38:04,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... +10: [2023-05-25 13:38:04,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. + 7: [2023-05-25 13:38:04,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +17: [2023-05-25 13:38:04,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... +16: [2023-05-25 13:38:04,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:04,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 7: [2023-05-25 13:38:04,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +15: [2023-05-25 13:38:04,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. +15: [2023-05-25 13:38:04,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. + 1: [2023-05-25 13:38:04,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +20: [2023-05-25 13:38:04,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +17: [2023-05-25 13:38:04,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:04,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +14: [2023-05-25 13:38:04,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_06_optim_states.pt... + 1: [2023-05-25 13:38:04,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 1: [2023-05-25 13:38:04,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 6: [2023-05-25 13:38:04,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 8: [2023-05-25 13:38:04,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. + 8: [2023-05-25 13:38:04,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. + 3: [2023-05-25 13:38:04,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 0: [2023-05-25 13:38:04,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... +17: [2023-05-25 13:38:04,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... + 5: [2023-05-25 13:38:04,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 3: [2023-05-25 13:38:04,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. +14: [2023-05-25 13:38:04,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_06_optim_states.pt... + 0: [2023-05-25 13:38:04,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 0: [2023-05-25 13:38:04,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 2: [2023-05-25 13:38:04,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 0: [2023-05-25 13:38:04,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +11: [2023-05-25 13:38:04,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_06_optim_states.pt... +11: [2023-05-25 13:38:04,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_06_optim_states.pt... +18: [2023-05-25 13:38:04,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 5: [2023-05-25 13:38:04,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +18: [2023-05-25 13:38:04,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... + 2: [2023-05-25 13:38:04,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 3: [2023-05-25 13:38:04,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +18: [2023-05-25 13:38:04,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 6: [2023-05-25 13:38:04,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 2: [2023-05-25 13:38:04,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +18: [2023-05-25 13:38:04,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... + 7: [2023-05-25 13:38:04,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 5: [2023-05-25 13:38:04,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 5: [2023-05-25 13:38:04,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... +23: [2023-05-25 13:38:04,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +23: [2023-05-25 13:38:04,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +16: [2023-05-25 13:38:04,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +23: [2023-05-25 13:38:04,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... +23: [2023-05-25 13:38:04,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... + 7: [2023-05-25 13:38:04,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... +28: [2023-05-25 13:38:04,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_14_optim_states.pt. +28: [2023-05-25 13:38:04,522] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 226 + 0: [2023-05-25 13:38:04,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... +16: [2023-05-25 13:38:04,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... +16: [2023-05-25 13:38:04,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. +20: [2023-05-25 13:38:04,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +20: [2023-05-25 13:38:04,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +16: [2023-05-25 13:38:04,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... +19: [2023-05-25 13:38:04,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +20: [2023-05-25 13:38:04,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... +20: [2023-05-25 13:38:04,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... + 2: [2023-05-25 13:38:04,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +19: [2023-05-25 13:38:04,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 1: [2023-05-25 13:38:04,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 2: [2023-05-25 13:38:04,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... + 2: [2023-05-25 13:38:04,536] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... + 1: [2023-05-25 13:38:04,537] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,537] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,538] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 7: [2023-05-25 13:38:04,538] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,540] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 4: [2023-05-25 13:38:04,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 3: [2023-05-25 13:38:04,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... +19: [2023-05-25 13:38:04,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +19: [2023-05-25 13:38:04,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +31: [2023-05-25 13:38:04,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_15_optim_states.pt. +31: [2023-05-25 13:38:04,550] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 251 + 1: [2023-05-25 13:38:04,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... + 1: [2023-05-25 13:38:04,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... + 0: [2023-05-25 13:38:04,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... +10: [2023-05-25 13:38:04,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_06_optim_states.pt... +10: [2023-05-25 13:38:04,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_06_optim_states.pt... +12: [2023-05-25 13:38:04,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_06_optim_states.pt... +12: [2023-05-25 13:38:04,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_06_optim_states.pt... + 7: [2023-05-25 13:38:04,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 0: [2023-05-25 13:38:04,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 0: [2023-05-25 13:38:04,559] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,560] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... + 0: [2023-05-25 13:38:04,563] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... +12: [2023-05-25 13:38:04,563] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_04_optim_states.pt. +12: [2023-05-25 13:38:04,563] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 100 +23: [2023-05-25 13:38:04,566] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +23: [2023-05-25 13:38:04,567] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. + 6: [2023-05-25 13:38:04,569] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +15: [2023-05-25 13:38:04,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_06_optim_states.pt... +15: [2023-05-25 13:38:04,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_06_optim_states.pt... +13: [2023-05-25 13:38:04,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_06_optim_states.pt... +13: [2023-05-25 13:38:04,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_06_optim_states.pt... + 6: [2023-05-25 13:38:04,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... +19: [2023-05-25 13:38:04,572] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +29: [2023-05-25 13:38:04,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_12_optim_states.pt. +29: [2023-05-25 13:38:04,574] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 236 +19: [2023-05-25 13:38:04,575] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +12: [2023-05-25 13:38:04,576] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 100 +19: [2023-05-25 13:38:04,576] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... +19: [2023-05-25 13:38:04,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... + 6: [2023-05-25 13:38:04,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +16: [2023-05-25 13:38:04,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +16: [2023-05-25 13:38:04,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. + 6: [2023-05-25 13:38:04,579] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... + 7: [2023-05-25 13:38:04,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... +18: [2023-05-25 13:38:04,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. + 7: [2023-05-25 13:38:04,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... +29: [2023-05-25 13:38:04,585] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 236 +29: [2023-05-25 13:38:04,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_13_optim_states.pt. +29: [2023-05-25 13:38:04,590] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 237 +18: [2023-05-25 13:38:04,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +27: [2023-05-25 13:38:04,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_13_optim_states.pt. +27: [2023-05-25 13:38:04,592] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 221 +28: [2023-05-25 13:38:04,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_15_optim_states.pt. +28: [2023-05-25 13:38:04,603] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 227 +16: [2023-05-25 13:38:04,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +16: [2023-05-25 13:38:04,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. + 8: [2023-05-25 13:38:04,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_06_optim_states.pt... + 8: [2023-05-25 13:38:04,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_06_optim_states.pt... +17: [2023-05-25 13:38:04,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +24: [2023-05-25 13:38:04,620] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_14_optim_states.pt. +24: [2023-05-25 13:38:04,620] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 194 +17: [2023-05-25 13:38:04,620] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +21: [2023-05-25 13:38:04,616] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +21: [2023-05-25 13:38:04,616] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +24: [2023-05-25 13:38:04,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_12_optim_states.pt. +24: [2023-05-25 13:38:04,622] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 196 +18: [2023-05-25 13:38:04,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +18: [2023-05-25 13:38:04,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +16: [2023-05-25 13:38:04,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_10_optim_states.pt... +16: [2023-05-25 13:38:04,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_10_optim_states.pt... +31: [2023-05-25 13:38:04,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_12_optim_states.pt. +31: [2023-05-25 13:38:04,625] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 252 +16: [2023-05-25 13:38:04,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_09_optim_states.pt... +16: [2023-05-25 13:38:04,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_09_optim_states.pt... +19: [2023-05-25 13:38:04,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +19: [2023-05-25 13:38:04,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +17: [2023-05-25 13:38:04,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +17: [2023-05-25 13:38:04,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +21: [2023-05-25 13:38:04,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +21: [2023-05-25 13:38:04,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +20: [2023-05-25 13:38:04,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +13: [2023-05-25 13:38:04,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_04_optim_states.pt. +13: [2023-05-25 13:38:04,633] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 108 +20: [2023-05-25 13:38:04,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +21: [2023-05-25 13:38:04,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_09_optim_states.pt... +21: [2023-05-25 13:38:04,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_09_optim_states.pt... +20: [2023-05-25 13:38:04,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +20: [2023-05-25 13:38:04,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +31: [2023-05-25 13:38:04,635] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 252 +26: [2023-05-25 13:38:04,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_12_optim_states.pt. +26: [2023-05-25 13:38:04,636] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 208 +24: [2023-05-25 13:38:04,636] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 196 +25: [2023-05-25 13:38:04,639] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_12_optim_states.pt. +22: [2023-05-25 13:38:04,639] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +25: [2023-05-25 13:38:04,639] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 200 +22: [2023-05-25 13:38:04,639] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +22: [2023-05-25 13:38:04,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +22: [2023-05-25 13:38:04,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +31: [2023-05-25 13:38:04,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_14_optim_states.pt. +31: [2023-05-25 13:38:04,644] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 254 +30: [2023-05-25 13:38:04,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_12_optim_states.pt. +30: [2023-05-25 13:38:04,646] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 240 +23: [2023-05-25 13:38:04,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_09_optim_states.pt... +23: [2023-05-25 13:38:04,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_09_optim_states.pt... +13: [2023-05-25 13:38:04,647] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 108 +18: [2023-05-25 13:38:04,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_09_optim_states.pt... +18: [2023-05-25 13:38:04,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_09_optim_states.pt... +26: [2023-05-25 13:38:04,648] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 208 +17: [2023-05-25 13:38:04,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_09_optim_states.pt... +17: [2023-05-25 13:38:04,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_09_optim_states.pt... +19: [2023-05-25 13:38:04,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +19: [2023-05-25 13:38:04,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +25: [2023-05-25 13:38:04,652] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 200 +19: [2023-05-25 13:38:04,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_10_optim_states.pt... +19: [2023-05-25 13:38:04,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_10_optim_states.pt... +17: [2023-05-25 13:38:04,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_10_optim_states.pt... +17: [2023-05-25 13:38:04,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_10_optim_states.pt... +20: [2023-05-25 13:38:04,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_09_optim_states.pt... +20: [2023-05-25 13:38:04,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_09_optim_states.pt... +24: [2023-05-25 13:38:04,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_12_optim_states.pt. +14: [2023-05-25 13:38:04,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_04_optim_states.pt. +24: [2023-05-25 13:38:04,654] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 192 +18: [2023-05-25 13:38:04,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_10_optim_states.pt... +18: [2023-05-25 13:38:04,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_10_optim_states.pt... +14: [2023-05-25 13:38:04,654] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 112 +20: [2023-05-25 13:38:04,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_10_optim_states.pt... +20: [2023-05-25 13:38:04,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_10_optim_states.pt... +30: [2023-05-25 13:38:04,658] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 240 +22: [2023-05-25 13:38:04,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_09_optim_states.pt... +22: [2023-05-25 13:38:04,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_09_optim_states.pt... +14: [2023-05-25 13:38:04,666] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 112 +24: [2023-05-25 13:38:04,667] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 192 +31: [2023-05-25 13:38:04,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_14_optim_states.pt. +31: [2023-05-25 13:38:04,668] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 250 +26: [2023-05-25 13:38:04,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_14_optim_states.pt. +26: [2023-05-25 13:38:04,674] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 214 +26: [2023-05-25 13:38:04,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_15_optim_states.pt. +26: [2023-05-25 13:38:04,677] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 211 +23: [2023-05-25 13:38:04,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +23: [2023-05-25 13:38:04,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +28: [2023-05-25 13:38:04,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_15_optim_states.pt. +28: [2023-05-25 13:38:04,691] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 231 + 2: [2023-05-25 13:38:04,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. +24: [2023-05-25 13:38:04,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_13_optim_states.pt. +24: [2023-05-25 13:38:04,697] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 197 + 2: [2023-05-25 13:38:04,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. +11: [2023-05-25 13:38:04,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_04_optim_states.pt. +11: [2023-05-25 13:38:04,701] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 92 +19: [2023-05-25 13:38:04,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_09_optim_states.pt... +19: [2023-05-25 13:38:04,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_09_optim_states.pt... +21: [2023-05-25 13:38:04,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_10_optim_states.pt... +21: [2023-05-25 13:38:04,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_10_optim_states.pt... + 7: [2023-05-25 13:38:04,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. + 7: [2023-05-25 13:38:04,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. +11: [2023-05-25 13:38:04,716] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 92 +22: [2023-05-25 13:38:04,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_10_optim_states.pt... +22: [2023-05-25 13:38:04,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_10_optim_states.pt... +26: [2023-05-25 13:38:04,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_12_optim_states.pt. +26: [2023-05-25 13:38:04,719] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 212 + 3: [2023-05-25 13:38:04,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. + 3: [2023-05-25 13:38:04,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. +26: [2023-05-25 13:38:04,732] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 212 +27: [2023-05-25 13:38:04,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_13_optim_states.pt. +27: [2023-05-25 13:38:04,735] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 217 + 7: [2023-05-25 13:38:04,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_01_optim_states.pt... + 7: [2023-05-25 13:38:04,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_01_optim_states.pt... +25: [2023-05-25 13:38:04,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_13_optim_states.pt. +25: [2023-05-25 13:38:04,742] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 201 +31: [2023-05-25 13:38:04,746] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 253 + 6: [2023-05-25 13:38:04,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. + 6: [2023-05-25 13:38:04,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. + 2: [2023-05-25 13:38:04,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 2: [2023-05-25 13:38:04,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 4: [2023-05-25 13:38:04,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. + 2: [2023-05-25 13:38:04,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_01_optim_states.pt... + 2: [2023-05-25 13:38:04,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_01_optim_states.pt... + 4: [2023-05-25 13:38:04,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. +30: [2023-05-25 13:38:04,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_14_optim_states.pt. +30: [2023-05-25 13:38:04,762] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 246 +25: [2023-05-25 13:38:04,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_14_optim_states.pt. +25: [2023-05-25 13:38:04,763] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 202 +28: [2023-05-25 13:38:04,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_12_optim_states.pt. +28: [2023-05-25 13:38:04,764] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 224 +27: [2023-05-25 13:38:04,764] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 217 +25: [2023-05-25 13:38:04,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_14_optim_states.pt. +25: [2023-05-25 13:38:04,767] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 206 +27: [2023-05-25 13:38:04,764] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 221 +21: [2023-05-25 13:38:04,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +27: [2023-05-25 13:38:04,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_15_optim_states.pt. +27: [2023-05-25 13:38:04,770] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 219 +21: [2023-05-25 13:38:04,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. + 5: [2023-05-25 13:38:04,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. +28: [2023-05-25 13:38:04,776] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 224 + 0: [2023-05-25 13:38:04,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 0: [2023-05-25 13:38:04,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 0: [2023-05-25 13:38:04,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. + 0: [2023-05-25 13:38:04,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. + 8: [2023-05-25 13:38:04,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_04_optim_states.pt. + 8: [2023-05-25 13:38:04,780] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 64 + 3: [2023-05-25 13:38:04,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 3: [2023-05-25 13:38:04,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. +30: [2023-05-25 13:38:04,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_15_optim_states.pt. +30: [2023-05-25 13:38:04,780] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 247 +12: [2023-05-25 13:38:04,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_04_optim_states.pt. +12: [2023-05-25 13:38:04,782] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 96 +29: [2023-05-25 13:38:04,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_15_optim_states.pt. +29: [2023-05-25 13:38:04,782] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 235 +23: [2023-05-25 13:38:04,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_10_optim_states.pt... +23: [2023-05-25 13:38:04,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_10_optim_states.pt... +27: [2023-05-25 13:38:04,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_15_optim_states.pt. +27: [2023-05-25 13:38:04,786] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 223 +26: [2023-05-25 13:38:04,789] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 211 +30: [2023-05-25 13:38:04,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_13_optim_states.pt. +30: [2023-05-25 13:38:04,791] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 241 + 2: [2023-05-25 13:38:04,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_03_optim_states.pt... + 2: [2023-05-25 13:38:04,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_03_optim_states.pt... + 4: [2023-05-25 13:38:04,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_01_optim_states.pt... + 4: [2023-05-25 13:38:04,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_01_optim_states.pt... + 1: [2023-05-25 13:38:04,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. +12: [2023-05-25 13:38:04,794] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 96 + 8: [2023-05-25 13:38:04,795] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 64 + 3: [2023-05-25 13:38:04,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_01_optim_states.pt... + 3: [2023-05-25 13:38:04,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_01_optim_states.pt... + 1: [2023-05-25 13:38:04,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. +24: [2023-05-25 13:38:04,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_15_optim_states.pt. +24: [2023-05-25 13:38:04,796] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 199 +29: [2023-05-25 13:38:04,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_13_optim_states.pt. +29: [2023-05-25 13:38:04,797] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 233 + 6: [2023-05-25 13:38:04,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_01_optim_states.pt... + 6: [2023-05-25 13:38:04,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_01_optim_states.pt... +22: [2023-05-25 13:38:04,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +22: [2023-05-25 13:38:04,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. + 3: [2023-05-25 13:38:04,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. + 3: [2023-05-25 13:38:04,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. +30: [2023-05-25 13:38:04,803] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 241 +29: [2023-05-25 13:38:04,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_15_optim_states.pt. +29: [2023-05-25 13:38:04,804] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 239 +29: [2023-05-25 13:38:04,804] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 237 +25: [2023-05-25 13:38:04,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_15_optim_states.pt. +25: [2023-05-25 13:38:04,805] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 207 + 7: [2023-05-25 13:38:04,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. + 7: [2023-05-25 13:38:04,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. + 3: [2023-05-25 13:38:04,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_03_optim_states.pt... + 3: [2023-05-25 13:38:04,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_03_optim_states.pt... +21: [2023-05-25 13:38:04,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_11_optim_states.pt... +29: [2023-05-25 13:38:04,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_14_optim_states.pt. +29: [2023-05-25 13:38:04,807] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 234 +29: [2023-05-25 13:38:04,808] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 233 + 0: [2023-05-25 13:38:04,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_03_optim_states.pt... + 0: [2023-05-25 13:38:04,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_03_optim_states.pt... +27: [2023-05-25 13:38:04,810] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 223 +28: [2023-05-25 13:38:04,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_14_optim_states.pt. +28: [2023-05-25 13:38:04,811] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 230 +24: [2023-05-25 13:38:04,812] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 199 +27: [2023-05-25 13:38:04,810] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 219 +24: [2023-05-25 13:38:04,813] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 197 +21: [2023-05-25 13:38:04,814] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_11_optim_states.pt... + 0: [2023-05-25 13:38:04,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_01_optim_states.pt... + 0: [2023-05-25 13:38:04,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_01_optim_states.pt... + 4: [2023-05-25 13:38:04,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 0: [2023-05-25 13:38:04,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. + 0: [2023-05-25 13:38:04,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. + 4: [2023-05-25 13:38:04,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. + 6: [2023-05-25 13:38:04,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. +26: [2023-05-25 13:38:04,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_14_optim_states.pt. +26: [2023-05-25 13:38:04,822] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 210 +23: [2023-05-25 13:38:04,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. +23: [2023-05-25 13:38:04,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. +13: [2023-05-25 13:38:04,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_04_optim_states.pt. +13: [2023-05-25 13:38:04,829] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 104 +18: [2023-05-25 13:38:04,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +18: [2023-05-25 13:38:04,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +25: [2023-05-25 13:38:04,831] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 201 +28: [2023-05-25 13:38:04,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_13_optim_states.pt. +28: [2023-05-25 13:38:04,837] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 229 +31: [2023-05-25 13:38:04,840] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 250 +24: [2023-05-25 13:38:04,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_15_optim_states.pt. +24: [2023-05-25 13:38:04,841] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 195 +13: [2023-05-25 13:38:04,843] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 104 +31: [2023-05-25 13:38:04,844] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 254 + 5: [2023-05-25 13:38:04,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. +10: [2023-05-25 13:38:04,835] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_04_optim_states.pt. +10: [2023-05-25 13:38:04,836] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 80 + 5: [2023-05-25 13:38:04,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. + 3: [2023-05-25 13:38:04,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_02_optim_states.pt... + 3: [2023-05-25 13:38:04,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_02_optim_states.pt... + 1: [2023-05-25 13:38:04,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_01_optim_states.pt... + 1: [2023-05-25 13:38:04,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_01_optim_states.pt... + 1: [2023-05-25 13:38:04,846] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. + 1: [2023-05-25 13:38:04,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. +16: [2023-05-25 13:38:04,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +16: [2023-05-25 13:38:04,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +24: [2023-05-25 13:38:04,848] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 194 +28: [2023-05-25 13:38:04,849] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 231 +28: [2023-05-25 13:38:04,851] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 229 +30: [2023-05-25 13:38:04,851] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 246 +19: [2023-05-25 13:38:04,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_01_optim_states.pt... + 5: [2023-05-25 13:38:04,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_01_optim_states.pt... + 6: [2023-05-25 13:38:04,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_02_optim_states.pt... + 6: [2023-05-25 13:38:04,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_02_optim_states.pt... +28: [2023-05-25 13:38:04,853] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 227 +15: [2023-05-25 13:38:04,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_04_optim_states.pt. +15: [2023-05-25 13:38:04,853] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 124 +10: [2023-05-25 13:38:04,853] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 80 +17: [2023-05-25 13:38:04,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +17: [2023-05-25 13:38:04,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +24: [2023-05-25 13:38:04,855] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 195 + 0: [2023-05-25 13:38:04,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_02_optim_states.pt... + 0: [2023-05-25 13:38:04,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_02_optim_states.pt... + 4: [2023-05-25 13:38:04,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_03_optim_states.pt... +29: [2023-05-25 13:38:04,857] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 239 + 4: [2023-05-25 13:38:04,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_03_optim_states.pt... +19: [2023-05-25 13:38:04,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_03_optim_states.pt... + 5: [2023-05-25 13:38:04,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_03_optim_states.pt... +29: [2023-05-25 13:38:04,858] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 235 +28: [2023-05-25 13:38:04,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_13_optim_states.pt. +28: [2023-05-25 13:38:04,860] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 225 +20: [2023-05-25 13:38:04,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +25: [2023-05-25 13:38:04,861] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 203 +25: [2023-05-25 13:38:04,863] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 207 +31: [2023-05-25 13:38:04,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_13_optim_states.pt. +31: [2023-05-25 13:38:04,864] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 249 +15: [2023-05-25 13:38:04,865] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 124 + 7: [2023-05-25 13:38:04,867] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 7: [2023-05-25 13:38:04,867] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. +20: [2023-05-25 13:38:04,868] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +30: [2023-05-25 13:38:04,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_13_optim_states.pt. +30: [2023-05-25 13:38:04,869] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 245 +28: [2023-05-25 13:38:04,871] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 225 + 2: [2023-05-25 13:38:04,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. + 2: [2023-05-25 13:38:04,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. +29: [2023-05-25 13:38:04,875] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 234 + 4: [2023-05-25 13:38:04,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. +31: [2023-05-25 13:38:04,876] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 249 +24: [2023-05-25 13:38:04,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_13_optim_states.pt. +24: [2023-05-25 13:38:04,878] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 193 +29: [2023-05-25 13:38:04,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_14_optim_states.pt. +29: [2023-05-25 13:38:04,881] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 238 +25: [2023-05-25 13:38:04,881] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 202 +30: [2023-05-25 13:38:04,881] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 245 + 1: [2023-05-25 13:38:04,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_03_optim_states.pt... + 1: [2023-05-25 13:38:04,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_03_optim_states.pt... +24: [2023-05-25 13:38:04,884] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_14_optim_states.pt. +24: [2023-05-25 13:38:04,884] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 198 +25: [2023-05-25 13:38:04,884] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 206 + 5: [2023-05-25 13:38:04,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_02_optim_states.pt... + 5: [2023-05-25 13:38:04,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_02_optim_states.pt... +24: [2023-05-25 13:38:04,890] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 193 + 4: [2023-05-25 13:38:04,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. +29: [2023-05-25 13:38:04,895] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 238 + 6: [2023-05-25 13:38:04,895] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,896] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. +26: [2023-05-25 13:38:04,896] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_13_optim_states.pt. +26: [2023-05-25 13:38:04,896] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 209 +24: [2023-05-25 13:38:04,896] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 198 +26: [2023-05-25 13:38:04,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_15_optim_states.pt. +26: [2023-05-25 13:38:04,898] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 215 +31: [2023-05-25 13:38:04,899] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 251 +27: [2023-05-25 13:38:04,900] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_14_optim_states.pt. +27: [2023-05-25 13:38:04,900] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 222 +17: [2023-05-25 13:38:04,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_11_optim_states.pt... +17: [2023-05-25 13:38:04,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_11_optim_states.pt... +22: [2023-05-25 13:38:04,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_11_optim_states.pt... +22: [2023-05-25 13:38:04,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_11_optim_states.pt... +26: [2023-05-25 13:38:04,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_13_optim_states.pt. +26: [2023-05-25 13:38:04,904] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 213 +26: [2023-05-25 13:38:04,904] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 214 +26: [2023-05-25 13:38:04,905] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 210 +28: [2023-05-25 13:38:04,905] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 226 + 7: [2023-05-25 13:38:04,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_02_optim_states.pt... + 7: [2023-05-25 13:38:04,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_02_optim_states.pt... +31: [2023-05-25 13:38:04,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_15_optim_states.pt. +31: [2023-05-25 13:38:04,908] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 255 +30: [2023-05-25 13:38:04,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_15_optim_states.pt. +30: [2023-05-25 13:38:04,910] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 243 + 7: [2023-05-25 13:38:04,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_03_optim_states.pt... + 7: [2023-05-25 13:38:04,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_03_optim_states.pt... + 2: [2023-05-25 13:38:04,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_02_optim_states.pt... + 2: [2023-05-25 13:38:04,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_02_optim_states.pt... +19: [2023-05-25 13:38:04,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_11_optim_states.pt... +19: [2023-05-25 13:38:04,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_11_optim_states.pt... +27: [2023-05-25 13:38:04,912] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 222 +26: [2023-05-25 13:38:04,912] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 209 +26: [2023-05-25 13:38:04,915] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 215 + 1: [2023-05-25 13:38:04,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_02_optim_states.pt... + 1: [2023-05-25 13:38:04,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_02_optim_states.pt... +26: [2023-05-25 13:38:04,918] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 213 +28: [2023-05-25 13:38:04,922] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 230 +31: [2023-05-25 13:38:04,922] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 255 +18: [2023-05-25 13:38:04,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_11_optim_states.pt... +18: [2023-05-25 13:38:04,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_11_optim_states.pt... +30: [2023-05-25 13:38:04,924] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 247 +30: [2023-05-25 13:38:04,927] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 243 +20: [2023-05-25 13:38:04,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_11_optim_states.pt... +20: [2023-05-25 13:38:04,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_11_optim_states.pt... +25: [2023-05-25 13:38:04,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_13_optim_states.pt. +25: [2023-05-25 13:38:04,935] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 205 + 9: [2023-05-25 13:38:04,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_04_optim_states.pt. + 9: [2023-05-25 13:38:04,938] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 76 +17: [2023-05-25 13:38:04,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_08_optim_states.pt. +17: [2023-05-25 13:38:04,943] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 136 +23: [2023-05-25 13:38:04,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_11_optim_states.pt... +23: [2023-05-25 13:38:04,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_11_optim_states.pt... +30: [2023-05-25 13:38:04,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_14_optim_states.pt. +30: [2023-05-25 13:38:04,944] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 242 + 8: [2023-05-25 13:38:04,947] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_04_optim_states.pt. + 8: [2023-05-25 13:38:04,948] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 68 +25: [2023-05-25 13:38:04,948] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 205 +17: [2023-05-25 13:38:04,954] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 136 +30: [2023-05-25 13:38:04,957] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 242 +27: [2023-05-25 13:38:04,958] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_14_optim_states.pt. +27: [2023-05-25 13:38:04,958] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 218 + 8: [2023-05-25 13:38:04,962] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 68 + 9: [2023-05-25 13:38:04,952] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 76 +27: [2023-05-25 13:38:04,972] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 218 + 4: [2023-05-25 13:38:04,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_02_optim_states.pt... + 4: [2023-05-25 13:38:04,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_02_optim_states.pt... +16: [2023-05-25 13:38:04,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_11_optim_states.pt... +16: [2023-05-25 13:38:04,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_11_optim_states.pt... +10: [2023-05-25 13:38:04,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_04_optim_states.pt. +10: [2023-05-25 13:38:04,975] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 84 +10: [2023-05-25 13:38:04,988] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 84 + 6: [2023-05-25 13:38:04,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_03_optim_states.pt... + 6: [2023-05-25 13:38:04,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_03_optim_states.pt... +15: [2023-05-25 13:38:05,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_04_optim_states.pt. +15: [2023-05-25 13:38:05,019] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 120 +15: [2023-05-25 13:38:05,031] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 120 +17: [2023-05-25 13:38:05,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_08_optim_states.pt. +17: [2023-05-25 13:38:05,074] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 140 + 9: [2023-05-25 13:38:05,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_04_optim_states.pt. + 9: [2023-05-25 13:38:05,067] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 72 + 9: [2023-05-25 13:38:05,080] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 72 +17: [2023-05-25 13:38:05,086] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 140 +11: [2023-05-25 13:38:05,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_04_optim_states.pt. +11: [2023-05-25 13:38:05,119] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 88 +19: [2023-05-25 13:38:05,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_08_optim_states.pt. +19: [2023-05-25 13:38:05,130] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 152 +11: [2023-05-25 13:38:05,131] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 88 +19: [2023-05-25 13:38:05,142] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 152 +22: [2023-05-25 13:38:05,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_08_optim_states.pt. +22: [2023-05-25 13:38:05,146] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 176 +10: [2023-05-25 13:38:05,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_05_optim_states.pt. +10: [2023-05-25 13:38:05,155] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 85 +22: [2023-05-25 13:38:05,160] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 176 + 8: [2023-05-25 13:38:05,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_05_optim_states.pt. + 8: [2023-05-25 13:38:05,175] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 69 + 8: [2023-05-25 13:38:05,187] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 69 + 9: [2023-05-25 13:38:05,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_05_optim_states.pt. +10: [2023-05-25 13:38:05,167] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 85 + 9: [2023-05-25 13:38:05,206] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 77 +10: [2023-05-25 13:38:05,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_05_optim_states.pt. +10: [2023-05-25 13:38:05,171] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 81 +10: [2023-05-25 13:38:05,183] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 81 +13: [2023-05-25 13:38:05,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_07_optim_states.pt. +13: [2023-05-25 13:38:05,235] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 111 +13: [2023-05-25 13:38:05,248] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 111 +12: [2023-05-25 13:38:05,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_07_optim_states.pt. +12: [2023-05-25 13:38:05,256] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 99 +15: [2023-05-25 13:38:05,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_07_optim_states.pt. +15: [2023-05-25 13:38:05,260] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 123 +12: [2023-05-25 13:38:05,268] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 99 +23: [2023-05-25 13:38:05,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_08_optim_states.pt. +23: [2023-05-25 13:38:05,271] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 188 +18: [2023-05-25 13:38:05,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_08_optim_states.pt. +18: [2023-05-25 13:38:05,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_08_optim_states.pt. +18: [2023-05-25 13:38:05,274] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 148 +15: [2023-05-25 13:38:05,274] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 123 +18: [2023-05-25 13:38:05,274] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 144 + 9: [2023-05-25 13:38:05,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_07_optim_states.pt. + 9: [2023-05-25 13:38:05,216] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 79 + 9: [2023-05-25 13:38:05,220] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 77 + 9: [2023-05-25 13:38:05,229] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 79 +14: [2023-05-25 13:38:05,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_04_optim_states.pt. + 8: [2023-05-25 13:38:05,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_07_optim_states.pt. + 8: [2023-05-25 13:38:05,276] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 67 +14: [2023-05-25 13:38:05,276] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 116 +23: [2023-05-25 13:38:05,283] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 188 +18: [2023-05-25 13:38:05,285] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 148 +18: [2023-05-25 13:38:05,286] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 144 +14: [2023-05-25 13:38:05,288] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 116 + 8: [2023-05-25 13:38:05,291] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 67 +11: [2023-05-25 13:38:05,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_07_optim_states.pt. +11: [2023-05-25 13:38:05,293] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 91 +14: [2023-05-25 13:38:05,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_05_optim_states.pt. +14: [2023-05-25 13:38:05,298] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 117 +21: [2023-05-25 13:38:05,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_08_optim_states.pt. +21: [2023-05-25 13:38:05,298] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 172 +15: [2023-05-25 13:38:05,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_07_optim_states.pt. +15: [2023-05-25 13:38:05,303] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 127 +22: [2023-05-25 13:38:05,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_08_optim_states.pt. +22: [2023-05-25 13:38:05,304] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 180 +11: [2023-05-25 13:38:05,307] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 91 +13: [2023-05-25 13:38:05,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_07_optim_states.pt. +13: [2023-05-25 13:38:05,308] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 107 +12: [2023-05-25 13:38:05,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_07_optim_states.pt. +12: [2023-05-25 13:38:05,311] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 103 +21: [2023-05-25 13:38:05,311] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 172 +14: [2023-05-25 13:38:05,311] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 117 +11: [2023-05-25 13:38:05,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_05_optim_states.pt. +11: [2023-05-25 13:38:05,313] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 93 +15: [2023-05-25 13:38:05,315] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 127 +22: [2023-05-25 13:38:05,318] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 180 +19: [2023-05-25 13:38:05,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_08_optim_states.pt. +19: [2023-05-25 13:38:05,319] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 156 +12: [2023-05-25 13:38:05,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_05_optim_states.pt. +12: [2023-05-25 13:38:05,321] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 97 +13: [2023-05-25 13:38:05,321] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 107 +14: [2023-05-25 13:38:05,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_06_optim_states.pt. +12: [2023-05-25 13:38:05,325] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 103 +11: [2023-05-25 13:38:05,327] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 93 +14: [2023-05-25 13:38:05,320] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 118 +10: [2023-05-25 13:38:05,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_07_optim_states.pt. +10: [2023-05-25 13:38:05,330] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 83 +19: [2023-05-25 13:38:05,332] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 156 +14: [2023-05-25 13:38:05,332] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 118 +12: [2023-05-25 13:38:05,333] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 97 + 9: [2023-05-25 13:38:05,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_05_optim_states.pt. + 9: [2023-05-25 13:38:05,335] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 73 + 9: [2023-05-25 13:38:05,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_06_optim_states.pt. + 9: [2023-05-25 13:38:05,335] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 78 +11: [2023-05-25 13:38:05,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_05_optim_states.pt. +11: [2023-05-25 13:38:05,341] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 89 +10: [2023-05-25 13:38:05,342] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 83 +14: [2023-05-25 13:38:05,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_06_optim_states.pt. +14: [2023-05-25 13:38:05,344] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 114 +11: [2023-05-25 13:38:05,354] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 89 +13: [2023-05-25 13:38:05,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_06_optim_states.pt. +13: [2023-05-25 13:38:05,356] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 106 +12: [2023-05-25 13:38:05,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_05_optim_states.pt. +12: [2023-05-25 13:38:05,366] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 101 +13: [2023-05-25 13:38:05,370] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 106 + 8: [2023-05-25 13:38:05,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_05_optim_states.pt. + 8: [2023-05-25 13:38:05,377] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 65 +15: [2023-05-25 13:38:05,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_05_optim_states.pt. +15: [2023-05-25 13:38:05,378] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 125 +12: [2023-05-25 13:38:05,379] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 101 +15: [2023-05-25 13:38:05,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_05_optim_states.pt. +15: [2023-05-25 13:38:05,382] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 121 + 8: [2023-05-25 13:38:05,390] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 65 +15: [2023-05-25 13:38:05,391] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 125 +15: [2023-05-25 13:38:05,396] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 121 +13: [2023-05-25 13:38:05,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_05_optim_states.pt. +13: [2023-05-25 13:38:05,399] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 105 + 9: [2023-05-25 13:38:05,352] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 73 +14: [2023-05-25 13:38:05,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_07_optim_states.pt. + 9: [2023-05-25 13:38:05,352] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 78 +10: [2023-05-25 13:38:05,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_06_optim_states.pt. +14: [2023-05-25 13:38:05,352] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 115 +14: [2023-05-25 13:38:05,357] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 114 +14: [2023-05-25 13:38:05,366] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 115 +10: [2023-05-25 13:38:05,396] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 86 +14: [2023-05-25 13:38:05,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_07_optim_states.pt. +14: [2023-05-25 13:38:05,366] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 119 +14: [2023-05-25 13:38:05,379] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 119 +13: [2023-05-25 13:38:05,412] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 105 +11: [2023-05-25 13:38:05,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_07_optim_states.pt. +11: [2023-05-25 13:38:05,413] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 95 + 8: [2023-05-25 13:38:05,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_07_optim_states.pt. + 8: [2023-05-25 13:38:05,419] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 71 + 4: [2023-05-25 13:38:05,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. + 4: [2023-05-25 13:38:05,420] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 36 +11: [2023-05-25 13:38:05,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_06_optim_states.pt. +11: [2023-05-25 13:38:05,427] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 90 +11: [2023-05-25 13:38:05,429] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 95 +16: [2023-05-25 13:38:05,429] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_08_optim_states.pt. +16: [2023-05-25 13:38:05,429] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 132 + 4: [2023-05-25 13:38:05,434] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 36 + 8: [2023-05-25 13:38:05,434] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 71 +10: [2023-05-25 13:38:05,409] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 86 +11: [2023-05-25 13:38:05,442] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 90 +16: [2023-05-25 13:38:05,444] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 132 +12: [2023-05-25 13:38:05,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_06_optim_states.pt. +12: [2023-05-25 13:38:05,447] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 98 + 9: [2023-05-25 13:38:05,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_06_optim_states.pt. + 9: [2023-05-25 13:38:05,451] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 74 + 3: [2023-05-25 13:38:05,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. + 3: [2023-05-25 13:38:05,458] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 24 +10: [2023-05-25 13:38:05,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_07_optim_states.pt. +10: [2023-05-25 13:38:05,459] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 87 +12: [2023-05-25 13:38:05,459] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 98 + 9: [2023-05-25 13:38:05,466] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 74 +18: [2023-05-25 13:38:05,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_09_optim_states.pt. +18: [2023-05-25 13:38:05,472] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 149 + 3: [2023-05-25 13:38:05,474] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 24 + 1: [2023-05-25 13:38:05,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. + 1: [2023-05-25 13:38:05,475] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 12 +10: [2023-05-25 13:38:05,474] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 87 +16: [2023-05-25 13:38:05,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_09_optim_states.pt. +16: [2023-05-25 13:38:05,483] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 129 +18: [2023-05-25 13:38:05,485] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 149 + 9: [2023-05-25 13:38:05,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_07_optim_states.pt. + 9: [2023-05-25 13:38:05,487] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 75 + 1: [2023-05-25 13:38:05,491] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 12 +16: [2023-05-25 13:38:05,496] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 129 +20: [2023-05-25 13:38:05,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_09_optim_states.pt. +20: [2023-05-25 13:38:05,499] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 161 + 8: [2023-05-25 13:38:05,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_06_optim_states.pt. + 8: [2023-05-25 13:38:05,502] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 66 + 9: [2023-05-25 13:38:05,500] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 75 +21: [2023-05-25 13:38:05,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_09_optim_states.pt. +21: [2023-05-25 13:38:05,501] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 173 +12: [2023-05-25 13:38:05,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_06_optim_states.pt. +12: [2023-05-25 13:38:05,504] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 102 +16: [2023-05-25 13:38:05,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_09_optim_states.pt. +16: [2023-05-25 13:38:05,506] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 133 +20: [2023-05-25 13:38:05,513] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 161 +12: [2023-05-25 13:38:05,517] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 102 + 8: [2023-05-25 13:38:05,517] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 66 +13: [2023-05-25 13:38:05,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_05_optim_states.pt. +13: [2023-05-25 13:38:05,518] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 109 +16: [2023-05-25 13:38:05,520] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 133 +20: [2023-05-25 13:38:05,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_08_optim_states.pt. +20: [2023-05-25 13:38:05,522] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 160 +16: [2023-05-25 13:38:05,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_08_optim_states.pt. +16: [2023-05-25 13:38:05,526] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 128 +13: [2023-05-25 13:38:05,532] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 109 +20: [2023-05-25 13:38:05,534] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 160 +13: [2023-05-25 13:38:05,536] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_06_optim_states.pt. +13: [2023-05-25 13:38:05,536] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 110 +14: [2023-05-25 13:38:05,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_05_optim_states.pt. +21: [2023-05-25 13:38:05,515] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 173 +21: [2023-05-25 13:38:05,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_08_optim_states.pt. +14: [2023-05-25 13:38:05,528] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 113 +21: [2023-05-25 13:38:05,518] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 168 +21: [2023-05-25 13:38:05,530] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 168 + 0: [2023-05-25 13:38:05,538] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. + 0: [2023-05-25 13:38:05,538] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 4 +16: [2023-05-25 13:38:05,539] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 128 +15: [2023-05-25 13:38:05,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_06_optim_states.pt. +15: [2023-05-25 13:38:05,542] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 126 +14: [2023-05-25 13:38:05,542] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 113 +13: [2023-05-25 13:38:05,551] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 110 + 0: [2023-05-25 13:38:05,552] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 4 +19: [2023-05-25 13:38:05,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_10_optim_states.pt. +19: [2023-05-25 13:38:05,553] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 154 +15: [2023-05-25 13:38:05,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_06_optim_states.pt. +15: [2023-05-25 13:38:05,555] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 122 +15: [2023-05-25 13:38:05,555] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 126 +19: [2023-05-25 13:38:05,559] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_09_optim_states.pt. +19: [2023-05-25 13:38:05,559] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 153 +19: [2023-05-25 13:38:05,566] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 154 + 2: [2023-05-25 13:38:05,568] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. + 2: [2023-05-25 13:38:05,568] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 16 +15: [2023-05-25 13:38:05,568] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 122 +20: [2023-05-25 13:38:05,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_08_optim_states.pt. +20: [2023-05-25 13:38:05,571] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 164 +19: [2023-05-25 13:38:05,572] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 153 +20: [2023-05-25 13:38:05,582] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 164 + 2: [2023-05-25 13:38:05,583] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 16 + 8: [2023-05-25 13:38:05,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_06_optim_states.pt. + 8: [2023-05-25 13:38:05,583] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 70 +20: [2023-05-25 13:38:05,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_10_optim_states.pt. +20: [2023-05-25 13:38:05,589] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 162 + 2: [2023-05-25 13:38:05,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +11: [2023-05-25 13:38:05,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_06_optim_states.pt. + 2: [2023-05-25 13:38:05,595] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 20 +11: [2023-05-25 13:38:05,595] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 94 + 8: [2023-05-25 13:38:05,600] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 70 +20: [2023-05-25 13:38:05,602] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 162 +19: [2023-05-25 13:38:05,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_09_optim_states.pt. +19: [2023-05-25 13:38:05,602] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 157 +10: [2023-05-25 13:38:05,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_06_optim_states.pt. +10: [2023-05-25 13:38:05,607] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 82 + 2: [2023-05-25 13:38:05,608] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 20 +11: [2023-05-25 13:38:05,611] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 94 + 5: [2023-05-25 13:38:05,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. + 5: [2023-05-25 13:38:05,614] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 40 +19: [2023-05-25 13:38:05,615] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 157 + 6: [2023-05-25 13:38:05,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_01_optim_states.pt. + 6: [2023-05-25 13:38:05,619] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 49 +18: [2023-05-25 13:38:05,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_09_optim_states.pt. +18: [2023-05-25 13:38:05,621] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 145 +10: [2023-05-25 13:38:05,623] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 82 + 1: [2023-05-25 13:38:05,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. + 1: [2023-05-25 13:38:05,629] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 8 + 5: [2023-05-25 13:38:05,629] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 40 +22: [2023-05-25 13:38:05,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_11_optim_states.pt. +22: [2023-05-25 13:38:05,631] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 183 +17: [2023-05-25 13:38:05,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_11_optim_states.pt. +17: [2023-05-25 13:38:05,632] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 143 + 5: [2023-05-25 13:38:05,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_01_optim_states.pt. + 5: [2023-05-25 13:38:05,633] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 41 + 6: [2023-05-25 13:38:05,634] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 49 +18: [2023-05-25 13:38:05,634] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 145 + 4: [2023-05-25 13:38:05,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. + 4: [2023-05-25 13:38:05,639] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 32 +23: [2023-05-25 13:38:05,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_10_optim_states.pt. +23: [2023-05-25 13:38:05,641] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 186 +21: [2023-05-25 13:38:05,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_11_optim_states.pt. +21: [2023-05-25 13:38:05,631] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 175 +21: [2023-05-25 13:38:05,643] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 175 +22: [2023-05-25 13:38:05,644] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 183 +17: [2023-05-25 13:38:05,644] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 143 + 1: [2023-05-25 13:38:05,646] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 8 + 6: [2023-05-25 13:38:05,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. + 6: [2023-05-25 13:38:05,648] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 52 + 5: [2023-05-25 13:38:05,650] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 41 +23: [2023-05-25 13:38:05,653] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 186 + 4: [2023-05-25 13:38:05,654] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 32 +20: [2023-05-25 13:38:05,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_10_optim_states.pt. +20: [2023-05-25 13:38:05,656] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 166 + 6: [2023-05-25 13:38:05,662] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 52 +23: [2023-05-25 13:38:05,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_11_optim_states.pt. +23: [2023-05-25 13:38:05,667] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 191 +17: [2023-05-25 13:38:05,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_10_optim_states.pt. +20: [2023-05-25 13:38:05,670] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 166 +17: [2023-05-25 13:38:05,670] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 142 +17: [2023-05-25 13:38:05,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_11_optim_states.pt. +17: [2023-05-25 13:38:05,671] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 139 +16: [2023-05-25 13:38:05,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_11_optim_states.pt. +16: [2023-05-25 13:38:05,676] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 131 +23: [2023-05-25 13:38:05,678] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 191 + 4: [2023-05-25 13:38:05,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_01_optim_states.pt. + 4: [2023-05-25 13:38:05,681] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 33 +17: [2023-05-25 13:38:05,685] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 139 +23: [2023-05-25 13:38:05,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_08_optim_states.pt. +23: [2023-05-25 13:38:05,686] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 184 +17: [2023-05-25 13:38:05,686] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 142 +16: [2023-05-25 13:38:05,689] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 131 +22: [2023-05-25 13:38:05,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_09_optim_states.pt. +22: [2023-05-25 13:38:05,690] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 181 + 4: [2023-05-25 13:38:05,697] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 33 +23: [2023-05-25 13:38:05,698] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 184 +23: [2023-05-25 13:38:05,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_10_optim_states.pt. +23: [2023-05-25 13:38:05,701] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 190 +22: [2023-05-25 13:38:05,703] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 181 +22: [2023-05-25 13:38:05,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_10_optim_states.pt. +22: [2023-05-25 13:38:05,703] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 182 +23: [2023-05-25 13:38:05,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_11_optim_states.pt. +23: [2023-05-25 13:38:05,703] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 187 +16: [2023-05-25 13:38:05,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_10_optim_states.pt. +16: [2023-05-25 13:38:05,704] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 130 +21: [2023-05-25 13:38:05,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_09_optim_states.pt. +21: [2023-05-25 13:38:05,681] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 169 +21: [2023-05-25 13:38:05,695] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 169 +23: [2023-05-25 13:38:05,713] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 190 + 4: [2023-05-25 13:38:05,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_03_optim_states.pt. + 4: [2023-05-25 13:38:05,714] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 35 +23: [2023-05-25 13:38:05,716] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 187 +22: [2023-05-25 13:38:05,717] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 182 +16: [2023-05-25 13:38:05,718] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 130 + 5: [2023-05-25 13:38:05,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_02_optim_states.pt. + 5: [2023-05-25 13:38:05,718] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 42 + 0: [2023-05-25 13:38:05,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. + 0: [2023-05-25 13:38:05,719] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 0 +16: [2023-05-25 13:38:05,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_10_optim_states.pt. +16: [2023-05-25 13:38:05,719] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 134 +18: [2023-05-25 13:38:05,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_11_optim_states.pt. +18: [2023-05-25 13:38:05,720] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 147 + 3: [2023-05-25 13:38:05,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_01_optim_states.pt. + 3: [2023-05-25 13:38:05,721] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 29 +17: [2023-05-25 13:38:05,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_09_optim_states.pt. +17: [2023-05-25 13:38:05,724] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 141 + 5: [2023-05-25 13:38:05,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_02_optim_states.pt. + 5: [2023-05-25 13:38:05,724] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 46 +17: [2023-05-25 13:38:05,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_10_optim_states.pt. +20: [2023-05-25 13:38:05,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_11_optim_states.pt. +17: [2023-05-25 13:38:05,729] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 138 +20: [2023-05-25 13:38:05,729] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 167 + 4: [2023-05-25 13:38:05,730] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 35 +18: [2023-05-25 13:38:05,731] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 147 +16: [2023-05-25 13:38:05,733] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 134 + 0: [2023-05-25 13:38:05,733] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 0 + 7: [2023-05-25 13:38:05,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_01_optim_states.pt. + 7: [2023-05-25 13:38:05,734] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 57 + 5: [2023-05-25 13:38:05,735] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 42 + 3: [2023-05-25 13:38:05,735] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 29 + 6: [2023-05-25 13:38:05,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. + 6: [2023-05-25 13:38:05,736] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 48 +17: [2023-05-25 13:38:05,736] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 141 +18: [2023-05-25 13:38:05,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_10_optim_states.pt. +18: [2023-05-25 13:38:05,736] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 146 +21: [2023-05-25 13:38:05,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_10_optim_states.pt. +21: [2023-05-25 13:38:05,731] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 174 +21: [2023-05-25 13:38:05,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_10_optim_states.pt. +21: [2023-05-25 13:38:05,734] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 170 + 5: [2023-05-25 13:38:05,739] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 46 +17: [2023-05-25 13:38:05,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_09_optim_states.pt. +17: [2023-05-25 13:38:05,739] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 137 +17: [2023-05-25 13:38:05,741] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 138 +20: [2023-05-25 13:38:05,743] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 167 +22: [2023-05-25 13:38:05,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_09_optim_states.pt. +22: [2023-05-25 13:38:05,745] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 177 +21: [2023-05-25 13:38:05,745] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 174 +23: [2023-05-25 13:38:05,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_09_optim_states.pt. +23: [2023-05-25 13:38:05,746] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 185 + 0: could not find arguments in the checkpoint ... + 0: checkpoint version 3.0 +21: [2023-05-25 13:38:05,748] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 170 +18: [2023-05-25 13:38:05,748] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 146 + 6: [2023-05-25 13:38:05,751] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 48 +23: [2023-05-25 13:38:05,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_09_optim_states.pt. +23: [2023-05-25 13:38:05,751] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 189 + 7: [2023-05-25 13:38:05,752] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 57 +17: [2023-05-25 13:38:05,753] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 137 + 6: [2023-05-25 13:38:05,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_02_optim_states.pt. + 6: [2023-05-25 13:38:05,754] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 50 +22: [2023-05-25 13:38:05,759] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 177 +23: [2023-05-25 13:38:05,760] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 185 + 5: [2023-05-25 13:38:05,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_01_optim_states.pt. + 5: [2023-05-25 13:38:05,761] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 45 +23: [2023-05-25 13:38:05,763] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 189 + 3: [2023-05-25 13:38:05,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. + 3: [2023-05-25 13:38:05,764] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 28 + 6: [2023-05-25 13:38:05,770] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 50 + 5: [2023-05-25 13:38:05,776] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 45 + 7: [2023-05-25 13:38:05,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_01_optim_states.pt. + 7: [2023-05-25 13:38:05,776] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 61 + 3: [2023-05-25 13:38:05,778] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 28 +18: [2023-05-25 13:38:05,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_11_optim_states.pt. +18: [2023-05-25 13:38:05,780] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 151 +18: [2023-05-25 13:38:05,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_10_optim_states.pt. +18: [2023-05-25 13:38:05,785] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 150 + 7: [2023-05-25 13:38:05,791] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 61 + 5: [2023-05-25 13:38:05,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. + 5: [2023-05-25 13:38:05,793] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 44 +18: [2023-05-25 13:38:05,794] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 151 +20: [2023-05-25 13:38:05,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_11_optim_states.pt. +20: [2023-05-25 13:38:05,795] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 163 + 6: [2023-05-25 13:38:05,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_03_optim_states.pt. + 6: [2023-05-25 13:38:05,798] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 51 +18: [2023-05-25 13:38:05,800] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 150 +19: [2023-05-25 13:38:05,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_10_optim_states.pt. +19: [2023-05-25 13:38:05,803] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 158 + 2: [2023-05-25 13:38:05,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_01_optim_states.pt. + 2: [2023-05-25 13:38:05,807] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 21 +19: [2023-05-25 13:38:05,808] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_11_optim_states.pt. +19: [2023-05-25 13:38:05,808] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 159 + 5: [2023-05-25 13:38:05,808] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 44 +20: [2023-05-25 13:38:05,809] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 163 + 2: [2023-05-25 13:38:05,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_03_optim_states.pt. + 2: [2023-05-25 13:38:05,811] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 19 + 6: [2023-05-25 13:38:05,815] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 51 +19: [2023-05-25 13:38:05,817] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 158 +19: [2023-05-25 13:38:05,821] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 159 + 2: [2023-05-25 13:38:05,822] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 21 +20: [2023-05-25 13:38:05,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_09_optim_states.pt. +20: [2023-05-25 13:38:05,826] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 165 + 2: [2023-05-25 13:38:05,827] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 19 +22: [2023-05-25 13:38:05,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_11_optim_states.pt. +22: [2023-05-25 13:38:05,835] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 179 + 7: [2023-05-25 13:38:05,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. + 7: [2023-05-25 13:38:05,836] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 56 + 1: [2023-05-25 13:38:05,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_01_optim_states.pt. +20: [2023-05-25 13:38:05,840] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 165 + 1: [2023-05-25 13:38:05,840] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 9 + 7: [2023-05-25 13:38:05,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_03_optim_states.pt. + 7: [2023-05-25 13:38:05,845] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 59 +22: [2023-05-25 13:38:05,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_10_optim_states.pt. +22: [2023-05-25 13:38:05,846] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 178 +22: [2023-05-25 13:38:05,849] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 179 +21: [2023-05-25 13:38:05,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_11_optim_states.pt. +21: [2023-05-25 13:38:05,828] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 171 +21: [2023-05-25 13:38:05,843] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 171 + 7: [2023-05-25 13:38:05,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_03_optim_states.pt. + 7: [2023-05-25 13:38:05,852] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 63 + 3: [2023-05-25 13:38:05,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_02_optim_states.pt. + 3: [2023-05-25 13:38:05,852] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 30 + 7: [2023-05-25 13:38:05,854] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 56 + 1: [2023-05-25 13:38:05,857] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 9 +22: [2023-05-25 13:38:05,857] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 178 + 7: [2023-05-25 13:38:05,861] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 59 + 2: [2023-05-25 13:38:05,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_01_optim_states.pt. + 2: [2023-05-25 13:38:05,862] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 17 + 0: [2023-05-25 13:38:05,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_01_optim_states.pt. + 0: [2023-05-25 13:38:05,866] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 5 + 3: [2023-05-25 13:38:05,867] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 30 + 7: [2023-05-25 13:38:05,871] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 63 + 3: [2023-05-25 13:38:05,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_01_optim_states.pt. + 3: [2023-05-25 13:38:05,873] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 25 + 2: [2023-05-25 13:38:05,876] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 17 + 0: [2023-05-25 13:38:05,880] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 5 +19: [2023-05-25 13:38:05,883] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_11_optim_states.pt. +19: [2023-05-25 13:38:05,883] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 155 + 3: [2023-05-25 13:38:05,887] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 25 + 1: [2023-05-25 13:38:05,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_02_optim_states.pt. + 1: [2023-05-25 13:38:05,894] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 14 + 1: [2023-05-25 13:38:05,895] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_01_optim_states.pt. + 1: [2023-05-25 13:38:05,895] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 13 + 7: [2023-05-25 13:38:05,896] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. + 7: [2023-05-25 13:38:05,896] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 60 +19: [2023-05-25 13:38:05,897] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 155 + 7: [2023-05-25 13:38:05,900] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_02_optim_states.pt. + 7: [2023-05-25 13:38:05,900] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 58 + 0: [2023-05-25 13:38:05,905] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_03_optim_states.pt. + 0: [2023-05-25 13:38:05,905] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 7 + 1: [2023-05-25 13:38:05,909] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 14 + 7: [2023-05-25 13:38:05,912] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 60 + 1: [2023-05-25 13:38:05,913] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 13 + 7: [2023-05-25 13:38:05,916] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 58 + 0: [2023-05-25 13:38:05,919] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 7 + 6: [2023-05-25 13:38:05,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_03_optim_states.pt. + 6: [2023-05-25 13:38:05,925] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 55 + 0: [2023-05-25 13:38:05,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_02_optim_states.pt. + 0: [2023-05-25 13:38:05,931] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 6 + 2: [2023-05-25 13:38:05,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_03_optim_states.pt. + 2: [2023-05-25 13:38:05,938] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 23 + 6: [2023-05-25 13:38:05,942] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 55 + 0: [2023-05-25 13:38:05,946] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 6 + 2: [2023-05-25 13:38:05,952] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 23 + 0: [2023-05-25 13:38:05,952] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_01_optim_states.pt. + 0: [2023-05-25 13:38:05,952] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 1 + 2: [2023-05-25 13:38:05,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_02_optim_states.pt. + 2: [2023-05-25 13:38:05,957] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 22 + 3: [2023-05-25 13:38:05,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_02_optim_states.pt. + 3: [2023-05-25 13:38:05,963] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 26 + 0: [2023-05-25 13:38:05,968] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 1 + 2: [2023-05-25 13:38:05,973] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 22 + 3: [2023-05-25 13:38:05,979] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 26 + 4: [2023-05-25 13:38:05,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_01_optim_states.pt. + 4: [2023-05-25 13:38:05,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_03_optim_states.pt. + 4: [2023-05-25 13:38:05,986] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 37 + 4: [2023-05-25 13:38:05,986] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 39 + 4: [2023-05-25 13:38:06,000] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 39 + 4: [2023-05-25 13:38:06,000] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 37 + 3: [2023-05-25 13:38:06,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_03_optim_states.pt. + 3: [2023-05-25 13:38:06,016] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 31 + 2: [2023-05-25 13:38:06,022] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_02_optim_states.pt. + 2: [2023-05-25 13:38:06,022] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 18 +16: [2023-05-25 13:38:06,024] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_11_optim_states.pt. +16: [2023-05-25 13:38:06,025] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 135 + 3: [2023-05-25 13:38:06,031] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 31 + 6: [2023-05-25 13:38:06,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_01_optim_states.pt. + 6: [2023-05-25 13:38:06,034] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 53 + 0: [2023-05-25 13:38:06,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_03_optim_states.pt. + 0: [2023-05-25 13:38:06,035] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 3 +16: [2023-05-25 13:38:06,036] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 135 + 4: [2023-05-25 13:38:06,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_02_optim_states.pt. + 4: [2023-05-25 13:38:06,039] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 38 + 5: [2023-05-25 13:38:06,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_03_optim_states.pt. + 5: [2023-05-25 13:38:06,039] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 43 + 2: [2023-05-25 13:38:06,039] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 18 + 1: [2023-05-25 13:38:06,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_03_optim_states.pt. + 1: [2023-05-25 13:38:06,042] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 15 + 4: [2023-05-25 13:38:06,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_02_optim_states.pt. + 4: [2023-05-25 13:38:06,047] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 34 + 6: [2023-05-25 13:38:06,050] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 53 + 0: [2023-05-25 13:38:06,051] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 3 + 4: [2023-05-25 13:38:06,054] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 38 + 5: [2023-05-25 13:38:06,055] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 43 + 5: [2023-05-25 13:38:06,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_03_optim_states.pt. + 5: [2023-05-25 13:38:06,056] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 47 + 0: [2023-05-25 13:38:06,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_02_optim_states.pt. + 0: [2023-05-25 13:38:06,057] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 2 + 1: [2023-05-25 13:38:06,059] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 15 + 6: [2023-05-25 13:38:06,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_02_optim_states.pt. + 6: [2023-05-25 13:38:06,061] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 54 + 4: [2023-05-25 13:38:06,062] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 34 + 1: [2023-05-25 13:38:06,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_02_optim_states.pt. + 1: [2023-05-25 13:38:06,063] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 10 + 1: [2023-05-25 13:38:06,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_03_optim_states.pt. + 1: [2023-05-25 13:38:06,068] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 11 + 5: [2023-05-25 13:38:06,073] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 47 + 0: [2023-05-25 13:38:06,075] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 2 + 6: [2023-05-25 13:38:06,077] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 54 + 1: [2023-05-25 13:38:06,079] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 10 + 1: [2023-05-25 13:38:06,085] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 11 + 3: [2023-05-25 13:38:06,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_03_optim_states.pt. + 3: [2023-05-25 13:38:06,095] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 27 + 3: [2023-05-25 13:38:06,109] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 27 + 7: [2023-05-25 13:38:06,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_02_optim_states.pt. + 7: [2023-05-25 13:38:06,212] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 62 + 7: [2023-05-25 13:38:06,229] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 62 + 0: successfully loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b at iteration 0 +31: time (ms) | load-checkpoint: 6029.05 + 0: estimated model parameters: 9.828646912 + 0: estimated model parameters without embeddings: 8.863956992 + 0: [after model, optimizer, and learning rate scheduler are built] datetime: 2023-05-25 13:38:06 + 0: > building train, validation, and test datasets ... + 0: > datasets target sizes (minimum size): + 0: train: 1 + 0: validation: 51200 + 0: test: 51200 + 0: > building train, validation, and test datasets for GPT ... + 0: > building dataset index ... + 0: reading sizes... + 0: reading pointers... + 0: reading document index... + 0: creating numpy buffer of mmap... + 0: creating memory view of numpy buffer... + 0: > finished creating indexed dataset in 0.015118 seconds + 0: number of documents: 835726 + 0: > dataset split: + 0: train: + 0: document indices in [0, 835726) total of 835726 documents + 0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_400M_text_document_train_indexmap_1ns_2048sl_1234s_doc_idx.npy + 0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_400M_text_document_train_indexmap_1ns_2048sl_1234s_sample_idx.npy + 0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_400M_text_document_train_indexmap_1ns_2048sl_1234s_shuffle_idx.npy + 0: loaded indexed file in 0.011 seconds + 0: total number of samples: 195101 + 0: total number of epochs: 1 + 0: > building dataset index ... + 0: reading sizes... + 0: reading pointers... + 0: reading document index... + 0: creating numpy buffer of mmap... + 0: creating memory view of numpy buffer... + 0: > finished creating indexed dataset in 0.008158 seconds + 0: number of documents: 364608 + 0: > dataset split: + 0: validation: + 0: document indices in [0, 364608) total of 364608 documents + 0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_51200ns_2048sl_1234s_doc_idx.npy + 0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_51200ns_2048sl_1234s_sample_idx.npy + 0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_51200ns_2048sl_1234s_shuffle_idx.npy + 0: loaded indexed file in 0.011 seconds + 0: total number of samples: 84978 + 0: total number of epochs: 1 + 0: > finished creating GPT datasets ... + 0: [after dataloaders are built] datetime: 2023-05-25 13:38:13 + 0: done with setup ... + 0: training ... +31: time (ms) | model-and-optimizer-setup: 14997.17 | train/valid/test-data-iterators-setup: 3085.96 + 0: [after training is done] datetime: 2023-05-25 13:38:13 +31: ----------------------------------------------------------------------------------------------------------------- +31: validation loss at the end of training for val data | lm loss value: 2.451606E+00 | lm loss PPL: 1.160698E+01 | +31: ----------------------------------------------------------------------------------------------------------------- +END 3583606: Thu 25 May 2023 01:41:36 PM EEST diff --git a/8b7178b13b/latest b/8b7178b13b/latest new file mode 100644 index 0000000000000000000000000000000000000000..7d2b36fef5477c4b300ce92aa705d2451bb96c90 --- /dev/null +++ b/8b7178b13b/latest @@ -0,0 +1 @@ +global_step84877 diff --git a/8b7178b13b/sbatch_8b7178b13b.sh b/8b7178b13b/sbatch_8b7178b13b.sh new file mode 100755 index 0000000000000000000000000000000000000000..e5ba082cd3767ecf369adf4c5f42c5854b996ebf --- /dev/null +++ b/8b7178b13b/sbatch_8b7178b13b.sh @@ -0,0 +1,165 @@ +#!/bin/bash +#SBATCH --exclude=nid007542 +#SBATCH --nodes=64 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=40 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=8b7178b13b + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" + +TRAIN_DATA_PATH=train13b.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_13B_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + + +PP_SIZE=2 +TP_SIZE=2 + +MICRO_BATCH_SIZE=2 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_9293M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=5000 + +# Tokens: 178000000000 +# -> Samples: 86914062 +TRAIN_SAMPLES=86_914_062 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 869_140 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1000 \ + --eval-iters 1 \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/8b7178b13b/sbatch_8b7178b13bfast.sh b/8b7178b13b/sbatch_8b7178b13bfast.sh new file mode 100755 index 0000000000000000000000000000000000000000..175db214ffd7141a56a68b4cad1a6bac52eb7db3 --- /dev/null +++ b/8b7178b13b/sbatch_8b7178b13bfast.sh @@ -0,0 +1,165 @@ +#!/bin/bash +#SBATCH --exclude=nid007542 +#SBATCH --nodes=32 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=40 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=8b7178b13bfast + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" + +TRAIN_DATA_PATH=train13b.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_13B_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + + +PP_SIZE=4 +TP_SIZE=4 + +MICRO_BATCH_SIZE=1 +GRADIENT_ACCUMULATION_STEPS=4 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_9293M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=5000 + +# Tokens: 178000000000 +# -> Samples: 86914062 +TRAIN_SAMPLES=86_914_062 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 869_140 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1000 \ + --eval-iters 1 \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/8b7178b13b/sbatch_8b7178b13bval.sh b/8b7178b13b/sbatch_8b7178b13bval.sh new file mode 100644 index 0000000000000000000000000000000000000000..8468075e4e0f739012d89141f5acec0326a5cab4 --- /dev/null +++ b/8b7178b13b/sbatch_8b7178b13bval.sh @@ -0,0 +1,172 @@ +#!/bin/bash +#SBATCH --exclude=nid007542 +#SBATCH --nodes=32 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=40 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=8b7178b13bval +VARIANT_CKPT=lm1-8b7-178b-c4-repetitions/8b7178b13b + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=$VARIANT_CKPT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" + +TRAIN_DATA_PATH=train400m.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_12B_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + +PP_SIZE=4 +TP_SIZE=4 + +MICRO_BATCH_SIZE=1 +GRADIENT_ACCUMULATION_STEPS=2 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_9293M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=5000 + +# Tokens: 11522010000 +# -> Samples: 5625981 +TRAIN_SAMPLES=1 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 0 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + --override-lr-scheduler \ + --reset-progress \ + --no-load-optim \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1 \ + --eval-iters 100 \ + --eval-only true \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + --num-workers 0 \ + --valid-num-workers 0 \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" + diff --git a/8b7178b13b/tensorboard_8b7178b13bval/events.out.tfevents.1685008153.nid006481.68881.0 b/8b7178b13b/tensorboard_8b7178b13bval/events.out.tfevents.1685008153.nid006481.68881.0 new file mode 100644 index 0000000000000000000000000000000000000000..f0efbf748479bcb0f5317f21f7597a48ec90d6a8 --- /dev/null +++ b/8b7178b13b/tensorboard_8b7178b13bval/events.out.tfevents.1685008153.nid006481.68881.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b597ce1c7bf44f6b1e6abb132b4a20eca28b1c7459687da55aac2ecdf4a06193 +size 980 diff --git a/8b7178b13b/tensorboard_8b7178b13bval/events.out.tfevents.1685008747.nid006481.73671.0 b/8b7178b13b/tensorboard_8b7178b13bval/events.out.tfevents.1685008747.nid006481.73671.0 new file mode 100644 index 0000000000000000000000000000000000000000..95d7df80ae6750b607d523cbdf26e712f0e79587 --- /dev/null +++ b/8b7178b13b/tensorboard_8b7178b13bval/events.out.tfevents.1685008747.nid006481.73671.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e28310706c342ee14540da74e2ed0788fb4a8c0177e8b3e87431348ad70cc1e7 +size 980 diff --git a/8b7178b13b/tensorboard_8b7178b13bval/events.out.tfevents.1685011041.nid006582.81892.0 b/8b7178b13b/tensorboard_8b7178b13bval/events.out.tfevents.1685011041.nid006582.81892.0 new file mode 100644 index 0000000000000000000000000000000000000000..877aad933123de0b4609859c287c212ce20c5de2 --- /dev/null +++ b/8b7178b13b/tensorboard_8b7178b13bval/events.out.tfevents.1685011041.nid006582.81892.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b4cb0150da9a66d1bdc510ffecd20f0bf5ebfb9055b64d8900c0d51d818cb2f +size 980 diff --git a/8b7178b25b/tensorboard_8b7178b25bval/events.out.tfevents.1684844937.nid006831.41907.0 b/8b7178b25b/tensorboard_8b7178b25bval/events.out.tfevents.1684844937.nid006831.41907.0 new file mode 100644 index 0000000000000000000000000000000000000000..0d937f21610efc10daddf5eade0203e1437744da --- /dev/null +++ b/8b7178b25b/tensorboard_8b7178b25bval/events.out.tfevents.1684844937.nid006831.41907.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc72841a0583f777855c5f12daf01d73fc680f926e1292248bbece4b38d1d2c9 +size 40 diff --git a/8b7178b25b/tensorboard_8b7178b25bval/events.out.tfevents.1684845162.nid006103.125466.0 b/8b7178b25b/tensorboard_8b7178b25bval/events.out.tfevents.1684845162.nid006103.125466.0 new file mode 100644 index 0000000000000000000000000000000000000000..74fcc84c13c7ed928d08513d03272e6c24650462 --- /dev/null +++ b/8b7178b25b/tensorboard_8b7178b25bval/events.out.tfevents.1684845162.nid006103.125466.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd109158818480397093590bad4c58a37888c2b0fd0a5b4e4ccbd4f83ab3509a +size 40 diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_0.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_0.json new file mode 100644 index 0000000000000000000000000000000000000000..2614c6483583b52635e4137146b77f5775f5b2b1 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.4274462846584994, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.026506045585829644}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.0743940764798566, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014865309649260369}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.32181178468311483, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0046779793640138664}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.11401591584123742, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020018749467427548}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.035398271042759596, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009285365780968767}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.15703798625688678, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0033071200278070876}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05433244012190525, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012833100172831976}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.07120802247039631, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001362906540968755}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.31125592253868395, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004548153090547622}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.10946929754745122, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018607174533106234}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.07100637180754554, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001398755345483045}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3067417251277733, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004374950039871412}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.10881188996025698, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018835358686024326}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_1.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_1.json new file mode 100644 index 0000000000000000000000000000000000000000..59b7c7092366fdf426ad0b6037b28ffeef5eb77d --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.5086086062549235, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.024148152161397957}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.07420670646576108, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0012994585087623457}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3699833502592873, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005221535560959868}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.11613651882382699, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0017869903586088247}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03528298594442465, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008131219269542471}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.18919504627943293, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003948470106675628}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05556040217298211, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011573526331625286}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.07064592165677544, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011911523984872805}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3529817507096153, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00491517289541728}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.11069330166344538, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016501808624972724}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.07063890112491973, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012189437083937394}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.35151452415262047, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004837998006757547}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.11048988644232176, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001670545364215763}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_2.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_2.json new file mode 100644 index 0000000000000000000000000000000000000000..317b841657e13d62e9a8d9db99e3dc99380517e7 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.6056899396941058, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.031095911436243143}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.07171720323593823, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0013407577731600893}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3940444073904271, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005453929450001824}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.11269434916911006, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016709087363002723}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03342565708524102, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007679332484666479}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.20366528828334413, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.004145286907818467}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05325348987984883, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010824823620329564}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.06654587012077771, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012126902994658729}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3626163032278441, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004808677935098044}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.10449422734114891, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014912988107590758}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06809528741035778, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012705443708076406}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3717881435944756, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.005017031823311842}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.1068591227441311, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015708609058507584}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_3.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_3.json new file mode 100644 index 0000000000000000000000000000000000000000..2344ba2166371c9e009cabee888742e18d7d3a0d --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.7043691445234566, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.033436352647492236}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06945197089001147, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0011528009544658436}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.40339819978438163, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005565480395978604}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.11118267231139893, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016217396109271171}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03230783770657853, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007067527472951313}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.20764571809595184, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.004196897185127135}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05211591902402141, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010331141960554706}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.0633620924773605, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010232470500386361}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3638284378001932, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004793212255287554}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.10117012026750102, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014209168284038964}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06592327408520764, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0010958929866110073}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.37984454029428794, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.005122585721030877}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.1053138951283582, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001529640334300114}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_4.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_4.json new file mode 100644 index 0000000000000000000000000000000000000000..a79dd0cac8241296fc9dda50bafa104aa227b127 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.7237940725363642, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05068347700106321}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06842641677261545, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0011165170819324747}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.406375539617853, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00550424978526618}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.10982478834712758, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015663207694755273}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.031843816337460154, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006802306529242348}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.2101465380791208, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.004148287013449519}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05155868190087433, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009911919774912529}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.06196757937456414, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0009925820821838872}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.36309144001642407, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004660726913536042}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.09912881209100917, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001366723974954788}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06489771066521006, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0010636680077013658}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3825474992320362, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.005083374729929655}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.10396329790700495, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014800754096392262}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_5.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_5.json new file mode 100644 index 0000000000000000000000000000000000000000..8fffd21c89441bf8cdb2acb8af008b87f9e487b4 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.8235132351061684, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03673652426206423}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06807661053530396, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0010608834381468243}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.41555132879030265, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005712806441867834}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.11000005624917578, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001511602784285709}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03190049650831127, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006496066520260159}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.21868118098088812, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.004351358375129378}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05209703492451165, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000966760303297213}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.061096978042372, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0009419140672594094}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3679922650615108, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004831134262904494}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.09833778576062344, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013130705666902862}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06407881465692666, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0010057526623759078}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.38744747681119496, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00519938044209454}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.10329798028758783, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014185087292018597}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_0.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..73b20dc91ee541fbb712bf2dfcd0acf637f83451 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.141621308728508, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001852197455721959}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.2487835153814821, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002883968433439267}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.16789045871402775, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001899331826630772}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.028274308481376006, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007638689773747983}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.05312688704536566, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001543560570845198}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.033980155567562176, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008772611553649017}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.10723271379275104, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012832871011880624}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.19569867076577613, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002336256705578366}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.12866222215650755, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013553903517883699}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.1310312538372522, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016949440714634507}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.2313303827432569, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0026938189641655444}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.15551623162738257, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017397466278372554}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.607743073760489, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0505448817544192}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_1.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..d2e53b96033a4d1c372c3234937825bc854f90b5 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.20162081804133491, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0023006404028398596}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.3302113914143275, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0029253291213495436}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.22971432102282577, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002024992641113895}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.05353641148052063, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0011596978277920967}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0901699660110981, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0018295391165180175}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.06075703698527234, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011280888351149878}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.14458374147535075, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001635799675433344}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.24465580855285374, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024107233856668618}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.16594057607108714, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014257612958005858}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.18996108622649027, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002170230659407399}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.31222699454801467, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027814459127952855}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.21655317006571786, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001899322620870555}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 3.247891638911873, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08372328781084526}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_2.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..93dd9b8f01ae8f550e0527e3eda3d6582355f7e4 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.22464896032753767, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.00267471320719044}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.3334397692148636, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0029039999435442228}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.24066179288126918, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020250503611946593}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.06367029131472855, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0013893020454497178}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.09620481258465774, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0019131250126952382}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.0672929256474048, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011920824100654986}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.16413395555652943, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0019891543999868556}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.250622872215609, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024435750314427333}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.17657109086645734, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001468075623241685}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.21176073950316202, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0025334766675070507}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.31521561952071925, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027809099107783215}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.2268588669501038, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019039387353264705}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 3.7546658955082104, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05106004200886398}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_3.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..815945d4b4111c883ee8c16edd14e8323e057239 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.19547574181004312, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003050688739798117}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.27599613346113083, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003547296490903856}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.1999944588590951, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0024431054075354647}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.05556100650606968, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0015316523441232619}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.07900668123150884, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001936538574501469}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.055502076840201975, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012165658097950253}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.14441472671608785, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0023810920879014986}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.20695094754990062, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0028534088606798126}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.14698550688574344, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018099945049349275}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.18528466682755065, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002913994201194578}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.2616826664782059, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0033798208398086076}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.1893838880235665, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0023154515872413177}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 3.693352277729935, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09530601369536958}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_4.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..b71d54a3e67e2d46dc4453f9ac960eb2521b0d73 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.063039334135716, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.00237217321150148}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.09017328403874066, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003175334226857727}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.06295072251821578, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021227903441220584}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.01726942511454547, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001004417819364943}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.026632929529636855, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014520619320699361}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.01740325470887385, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008413142438273321}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.048037538244205064, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0018761060154017623}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.06863572497099125, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002484143953625902}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.04720899567270062, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015949102808424699}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.059620692839723696, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0022577542164374776}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.08531399766560527, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00302563434718094}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.05941144209472935, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020085067689640797}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.7011252343751181, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.054776720182795745}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_5.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..8291124fa25f3adc42aeb15352168cf31380cb87 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.011023013045319767, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0011239191554005244}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.01576861876664237, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.001514110135289748}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.010948945347352195, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0010303712513734353}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.0030642982687765735, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00043761754647970557}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0048436181769985496, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0007069513940476318}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.003081959728364898, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0003958924398148921}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.008303485879920013, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.000873077615735369}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.011933114229936235, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0011773390443061134}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.008113230483687816, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.000757965684491531}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.010476549059782741, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0010807405840856448}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.015045032229734936, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0014570036926186405}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.010379882751860353, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0009814470061507048}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 6.442668131454499e-07, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 1.6780002001919048e-06}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_0.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_0.json new file mode 100644 index 0000000000000000000000000000000000000000..765727a6f80446f53c635ecc16eec018856d0515 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 1.2617835680167517, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10635778923661202}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.11745138823891885, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015011277238236715}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.22985234642531047, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002121425510390003}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.14934980508808265, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016043096357849695}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.022449491334163164, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007873950105451982}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.041963515173589365, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0013675959887181714}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.027899167959252163, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009510765177625647}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.1035383301816468, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010714123610581939}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.21015803872628752, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0017418540261349958}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.13395735123983762, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00119951498857243}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.10678429826736537, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013783155650794233}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.20841519216554769, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0019094622189363957}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.13550871061320946, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001452232590175344}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_1.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_1.json new file mode 100644 index 0000000000000000000000000000000000000000..b85ef1c8b896c6fde4f6ee3b83e35d69abeef363 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 8.028465709884875, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10414333335862284}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.33860487694185304, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002059676862058819}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.5051950548626472, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00294388539222501}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.39099089747429966, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019423224150055892}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.14891190026236875, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0014722962919980933}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.2273413357531751, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002240835952327236}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.17313651053671014, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0015844557666629471}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.24778622071466067, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001542767658539951}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3751611447552265, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024874116332485865}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.2875433639233825, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015221333146636332}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.2789653525284207, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001973666047446342}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.4143210047809657, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002765128231116627}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3214385233916606, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001926585809990621}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_2.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_2.json new file mode 100644 index 0000000000000000000000000000000000000000..d50e567a17e6cc22bf11c10db0af0e407c3601ce --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 10.118135078197039, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09810052501619766}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.37391944585631076, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0019127935246138258}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.5276735152486758, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0028083305880520666}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.42448550544175645, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001825472888691242}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.17745705101705952, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0015612598687988076}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.25548983218564925, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0023268496099242975}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.20250984831545202, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016721804731251128}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.2708392675295398, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0015293358001324147}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3866756916075708, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024884834616842755}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3085528535031243, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015558243556986532}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.31184277536230626, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018957620411070345}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.43875485442059636, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002710851391374029}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.35353842822798887, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001886743880999753}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_3.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_3.json new file mode 100644 index 0000000000000000000000000000000000000000..e029d5bafd94374ccfaec8e39174b779b089da30 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 10.189563951094414, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09427499367940084}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.36817190833860697, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0019656678844549645}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.5363504766997446, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0027352574830906383}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.42389949583381237, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018605488068051588}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.17677690202821514, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0015500577175042321}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.2630023508064778, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0023343418511276868}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.20474913748031123, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016798602044551796}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.26924224328922214, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014968175338731645}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3991179057569062, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002484404009518406}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3118160024380036, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015347599766876738}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.30800886875805217, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0019097659829946047}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.4483937574374488, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0026777107579305293}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3544946381824597, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019027110628313127}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_4.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_4.json new file mode 100644 index 0000000000000000000000000000000000000000..923f7ac044c6aa756fbb538b4cd0f37c97e55bfa --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 10.087905126304989, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.12083096625574397}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.363770828222443, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0020036630555845163}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.5387177659214019, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002646963846141895}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4212315759756044, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018415320213085158}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.17597560551882171, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001571481262205388}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.2657691623220242, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002333604850349637}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.20471927374375903, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016719159604382972}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.26877802100515913, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0015442331987074057}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.4052364760809525, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024774430086498}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3130529200481379, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015386652999892186}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3043335333487051, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0019221279858750424}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.45057659617077206, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0026276807170486868}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3523313968378458, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001878784755073219}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_5.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_5.json new file mode 100644 index 0000000000000000000000000000000000000000..b6fb707f5b388b9665304c81fac98fd3be4f28ad --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 9.819773449865215, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.11548211534367327}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.35770393515726967, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0020171246319611895}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.539688571960094, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002611808763994368}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4171512639820159, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018457472467404608}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.1728779412674664, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001552564907957209}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.26604296450461634, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002299997324816336}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.20263412608550646, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016590004976459545}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.2650100444015115, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0015203204596296734}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.4087745798701937, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024798181425658206}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3113829743853015, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001519582979021018}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3001431880229394, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0019447092940593606}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.4521649916174878, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0025893467916896255}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.34979542583413165, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018887263526221952}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_gem_xsum_article_DOC_summary_0.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_gem_xsum_article_DOC_summary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..961b6ede25fe7f0491e25f0823e2624f346b3998 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_gem_xsum_article_DOC_summary_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.1640429927941635, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002015041956665472}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.39341008253163384, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004636047629579328}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.22742139033510708, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0026114290632041296}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.043275784885704716, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001336391186147405}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.10866533118072726, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003402923147178015}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.060906753315850805, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0018640421740763493}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.12259957209931875, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0015710911079803718}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.2958746933623946, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0037695130327974375}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.17023515816181556, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002061842004493983}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.12928277474166358, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017231366356998507}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.31266179796436216, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004192600769757827}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.1796488354953124, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00228884690406039}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 2.660236171533595, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.13058979906246349}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_gem_xsum_article_DOC_summary_1.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_gem_xsum_article_DOC_summary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..1d42fa25d841731f84c1d3a577dae929a9a8e0a2 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_gem_xsum_article_DOC_summary_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.13655953349875768, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0019656757175551577}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.3351266289452704, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0045506786659058425}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.19173204586716094, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002639692576988235}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.030642922658876727, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001142930640307047}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.07840904303144859, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003007597927500564}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.043509124193048414, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016166226125424621}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.10515008122500365, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014878667949867396}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.2603291047174206, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0036304258303670577}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.1479712469429257, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002021808544636796}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.10864088340556836, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016392672849460208}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.26926885885160073, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004010439316574938}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.15295586712265904, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022389650188625095}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.8314489114971673, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05409360756715158}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_gem_xsum_article_DOC_summary_2.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_gem_xsum_article_DOC_summary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..c8886c47b8f56bbd88d5794431f37d06ff7a19ed --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_gem_xsum_article_DOC_summary_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.14713632905683816, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0020215279085607075}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.3625697155906082, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004676136272316295}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.20690402045254036, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0027232531224059726}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.03817870519938818, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0012523560434755418}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.09803539445968558, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003268351761516068}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.05427974421404337, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0017696178332732388}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.11527249431509622, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0015525999339183435}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.28699007838315493, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003846678173723021}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.16250269839239403, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0021208807462068965}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.11807937715144022, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017041758270634855}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.2936525618807245, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004161272394841371}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.16643954154187726, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0023327546790871127}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 2.3328395887156255, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10816864533274577}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_gem_xsum_article_DOC_summary_3.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_gem_xsum_article_DOC_summary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..720946a1e0da61371aef15c51995217a0ca46800 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_gem_xsum_article_DOC_summary_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.15050477129545162, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0021650717494144725}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.35833402440286477, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004875928275119348}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.20790416673134823, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0027904145464295793}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.03967555502420575, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001257612569171145}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.0995195510617509, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0032409089175577817}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.05578255582904122, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0017532403099110591}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.11465132801092887, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001666726965584784}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.27592762804713344, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004014306258609167}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.1588561646225248, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00218993426182156}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.11933090671402025, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018043535233345025}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.28657490925873824, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004297538545009866}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.1652197759476937, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002376223870136714}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 2.4415835212159744, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09205861798425756}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_gem_xsum_article_DOC_summary_4.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_gem_xsum_article_DOC_summary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..654494ca59039970320de54d76eb99bd2e717249 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_gem_xsum_article_DOC_summary_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.04479833541552641, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0024920338028095244}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.09115873549013587, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0051576903382473185}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.05651233636898341, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0030905378199610973}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.010549200347804146, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008786327894641382}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.024034279053592206, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0020028495235980617}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.014022544687027934, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011421600358457382}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.03426844067651488, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0019596209525071014}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.06925411151199752, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003988959935482703}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.04286140894694921, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002370447391151435}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.036540645050964785, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0020678075006755215}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.07413354565538481, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004252794848785553}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.045863687869416005, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0025272572767658435}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.0886930800027617, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.13685163756146332}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_gem_xsum_article_DOC_summary_5.json b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_gem_xsum_article_DOC_summary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..7c108fccbf195cc0c1d7dfc5bb20227ecf9ad1eb --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/agg.8b7178b25bopt_gem_xsum_article_DOC_summary_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.0027738518451789936, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0007772852422880741}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.004449774503151586, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0013000230402188415}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.0030531767218957887, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0008235812485186412}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0004318387593770127, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0002096018140886139}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.0006493854199619724, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0002769463291779765}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.000461932013569787, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00019747062019636368}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.0018680723098669894, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0005358076043197855}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.0029818697516115103, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0008929801630497813}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.00206141715660179, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0005827251790517291}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.002156304889205613, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0006383957765946712}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.003337441465976244, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0009699711366361862}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.002323102977881885, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0006354398848530671}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 2.170644306962683e-19, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 5.499005165072242e-15}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 8, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_0.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..23774900cf809c28f1ce45515e28514f6ac0e40f --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56297267d239aa3ae85dc064841b9cca5245e3de73573c9ae0960457eb9791a5 +size 4118306 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_1.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..778f53104f491eeb4568c87d19a1e81820d11ce0 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a5cc85ea64b71c5d231ad74de0f4845a348ee78c2d8e8fb1d7785f664a496f +size 5086123 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_2.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ddc070cc38c324e642b55bca44733f4ae8511cea --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8314926e52648b082ba73d976fd1452e86ef5cf86bc265ce345514f782edec9c +size 6068364 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_3.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5c10d9ea1c2c46af52380db9b1c35bfc305fe4dd --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81acbde5d9aab82313450f9e9ea922a07ae1d69be7335f358fca8c2022642cc9 +size 7009353 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_4.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..81d02ad3684f0810d547ae12c69f3c5c6745187f --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e585664128a62caf515217bed81a082c1c531711317cfe73f637998cfae6a435 +size 7928900 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_5.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..77c74e76617358a00a4a9af8523cdad5d8691f5c --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a9e7306234e7e2f02c0b3bc0d7608e9adcb780980f15c155336656c1bc1fe99 +size 8833847 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_0.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..bb522a73d6e7a5027c6e48f855d66f00c51a7530 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e18dd76311b0935a7ba672787f6956531f3b2dcded6f1ab4d27cc3c25ab2d9e9 +size 7667394 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_1.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..814e06948bea7eb6099bac7e5a20a06a7e03cc46 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b75386aa311a8e4be57fcbc118a00232eaa1cd97175d38dbf0f586a3cd95b00 +size 13300073 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_2.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e782f438d9fd73d154b44d9ac8afec36ca56e8fb --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:338a66c62b4f06b1ce998e885a68fb128041512fb2202e1d262989a6314e7c21 +size 18870569 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_3.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..06fba677992350f0e849e734178420c36fe6d17d --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45d62b4accabd2089e91dae63aa312c8403b62a861aab9149a0bf049924fa410 +size 24295083 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_4.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6fe8caa81251c278d566bf6656cf0753a19c8638 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4a3047a2d5d29135e6cc65a86fb69549f7721343764f1fbb6afc2533ca81ca1 +size 29456332 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_5.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..dc20a4aecbd86c2cff7e6951bbb142e2538b56d5 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d559f0e51104fe34348ef9430a55951d257c7274fe476af18dfcaa5fd727ffa +size 34798468 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7338cc2ff17373f5788ff056e2b8b6d20ad1c3c4 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a77a997fdebc9f76be3b8c9828dcf7ad3e8a297aeef2ef4bc6f56b71a5e3f9da +size 4459245 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..301e9cbcd798b3d1fc02a84c7c9141cf5f95c50e --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394a4bc43e0737726bfb023c5f9dafcb3705350cff335f425d830104354af7b3 +size 5362308 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..87e9dc798a09b49f7fcfca4035a2b77eeb48e10d --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f7a3f1decac1a61d7e5f46e1f71cb674ce4756de70453b75a5b34c4454a044c +size 6400562 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b3567222f1468c970fb9c6ff8002b24a8d9876b7 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de12f63bd682112de02d9e8b08c9294185ec30a3f78341999e2d835058ea3d0a +size 7505391 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..237a2740bd98c1c5440222e0375b2625ec9d32d3 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83d4b48fc324f90e0155558b1b628d2e36b9f8c8dc8078f814a0d89d244abeae +size 8597394 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ccc284805e4d1c09aaa28b33f54601dfa4ca41e7 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c04ae1fa0fdd148f2ce8b19ce5c2e8821cb3f6b70f9cc62e1b1863d8e9f5cd22 +size 9698801 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_0.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..bbb56ee1cc7bf1f17434030ca11df22a85c9b68a --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a996aaed46bbefbf4504e987b46a4abd1c088c4ae1b7307c9bf6154361c3de0c +size 2836535 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_1.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..bc8b2892b5c81c914de43dcd6e11bae322d18e37 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0420857c7c65e4c7e878d72477fea1c17741e3098ca77de3e505e1ec07cdfe68 +size 5103416 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_2.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7f8f9cf8dcd363e7c6540446a92b522ca64e007c --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc3e0a9fe038cc9fed697ff964e9854e112945b8b13c11203dded37291ec00cd +size 7381000 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_3.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2d81e977e8d89cea516eb0356f4c8004f0a6101d --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eb11cdb3e26b29d82022c9776a92757ffe6237b2f459c5ede4e366c9a987202 +size 9649042 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_4.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3aebc3d70ad493c3798dec3c2a41de71c2cb07ab --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02058b7691dbd30c08ca2eb01763f261ab6478d98c85b73633a218401ab7270f +size 11673930 diff --git a/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_5.jsonl b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2dc9246e4720091806ca3b4cbf11c866a2250e3d --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/examples.8b7178b25bopt_gem_xsum_article_DOC_summary_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7580f05302af06eff7bd339ddcb78783f10f5b9e878d7b2a256944eb5a131b59 +size 13899083 diff --git a/8b7178b25bopt/evaluation/generation/merged.csv b/8b7178b25bopt/evaluation/generation/merged.csv new file mode 100644 index 0000000000000000000000000000000000000000..2790631fff27a0c22c6e68a36afbac6499cc4041 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/merged.csv @@ -0,0 +1,53 @@ +dataset,fewshots,prompt,metric,value +e2e_nlg_cleaned,0,generate_text_restaurant,rouge2_fmeasure,0.027899167959252163 +e2e_nlg_cleaned,0,median,rouge2_fmeasure,0.027899167959252163 +e2e_nlg_cleaned,1,generate_text_restaurant,rouge2_fmeasure,0.17313651053671014 +e2e_nlg_cleaned,1,median,rouge2_fmeasure,0.17313651053671014 +e2e_nlg_cleaned,2,generate_text_restaurant,rouge2_fmeasure,0.20250984831545202 +e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.20250984831545202 +e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.20474913748031123 +e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.20474913748031123 +e2e_nlg_cleaned,4,generate_text_restaurant,rouge2_fmeasure,0.20471927374375903 +e2e_nlg_cleaned,4,median,rouge2_fmeasure,0.20471927374375903 +e2e_nlg_cleaned,5,generate_text_restaurant,rouge2_fmeasure,0.20263412608550646 +e2e_nlg_cleaned,5,median,rouge2_fmeasure,0.20263412608550646 +e2e_nlg_cleaned,5,average,multiple,0.16927467735349852 +gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.060906753315850805 +gem_xsum,0,median,rouge2_fmeasure,0.060906753315850805 +gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.043509124193048414 +gem_xsum,1,median,rouge2_fmeasure,0.043509124193048414 +gem_xsum,2,article_DOC_summary,rouge2_fmeasure,0.05427974421404337 +gem_xsum,2,median,rouge2_fmeasure,0.05427974421404337 +gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.05578255582904122 +gem_xsum,3,median,rouge2_fmeasure,0.05578255582904122 +gem_xsum,4,article_DOC_summary,rouge2_fmeasure,0.014022544687027934 +gem_xsum,4,median,rouge2_fmeasure,0.014022544687027934 +gem_xsum,5,article_DOC_summary,rouge2_fmeasure,0.000461932013569787 +gem_xsum,5,median,rouge2_fmeasure,0.000461932013569787 +gem_xsum,5,average,multiple,0.03816044237543025 +web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.05433244012190525 +web_nlg_en,0,median,rouge2_fmeasure,0.05433244012190525 +web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.05556040217298211 +web_nlg_en,1,median,rouge2_fmeasure,0.05556040217298211 +web_nlg_en,2,PALM_prompt,rouge2_fmeasure,0.05325348987984883 +web_nlg_en,2,median,rouge2_fmeasure,0.05325348987984883 +web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.05211591902402141 +web_nlg_en,3,median,rouge2_fmeasure,0.05211591902402141 +web_nlg_en,4,PALM_prompt,rouge2_fmeasure,0.05155868190087433 +web_nlg_en,4,median,rouge2_fmeasure,0.05155868190087433 +web_nlg_en,5,PALM_prompt,rouge2_fmeasure,0.05209703492451165 +web_nlg_en,5,median,rouge2_fmeasure,0.05209703492451165 +web_nlg_en,5,average,multiple,0.053152994670690595 +wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.033980155567562176 +wiki_lingua_en,0,median,rouge2_fmeasure,0.033980155567562176 +wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.06075703698527234 +wiki_lingua_en,1,median,rouge2_fmeasure,0.06075703698527234 +wiki_lingua_en,2,tldr_en,rouge2_fmeasure,0.0672929256474048 +wiki_lingua_en,2,median,rouge2_fmeasure,0.0672929256474048 +wiki_lingua_en,3,tldr_en,rouge2_fmeasure,0.055502076840201975 +wiki_lingua_en,3,median,rouge2_fmeasure,0.055502076840201975 +wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.01740325470887385 +wiki_lingua_en,4,median,rouge2_fmeasure,0.01740325470887385 +wiki_lingua_en,5,tldr_en,rouge2_fmeasure,0.003081959728364898 +wiki_lingua_en,5,median,rouge2_fmeasure,0.003081959728364898 +wiki_lingua_en,5,average,multiple,0.039669568246280006 diff --git a/8b7178b25bopt/evaluation/generation/merged.json b/8b7178b25bopt/evaluation/generation/merged.json new file mode 100644 index 0000000000000000000000000000000000000000..f6e5ec8139973536804186cb95ca162730d7e03f --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/merged.json @@ -0,0 +1 @@ +{"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.4274462846584994, "bleu_stderr": 0.026506045585829644, "rouge1_fmeasure": 0.11401591584123742, "rouge1_fmeasure_stderr": 0.0020018749467427548, "rouge1_precision": 0.0743940764798566, "rouge1_precision_stderr": 0.0014865309649260369, "rouge1_recall": 0.32181178468311483, "rouge1_recall_stderr": 0.0046779793640138664, "rouge2_fmeasure": 0.05433244012190525, "rouge2_fmeasure_stderr": 0.0012833100172831976, "rouge2_precision": 0.035398271042759596, "rouge2_precision_stderr": 0.0009285365780968767, "rouge2_recall": 0.15703798625688678, "rouge2_recall_stderr": 0.0033071200278070876, "rougeL_fmeasure": 0.10946929754745122, "rougeL_fmeasure_stderr": 0.0018607174533106234, "rougeL_precision": 0.07120802247039631, "rougeL_precision_stderr": 0.001362906540968755, "rougeL_recall": 0.31125592253868395, "rougeL_recall_stderr": 0.004548153090547622, "rougeLsum_fmeasure": 0.10881188996025698, "rougeLsum_fmeasure_stderr": 0.0018835358686024326, "rougeLsum_precision": 0.07100637180754554, "rougeLsum_precision_stderr": 0.001398755345483045, "rougeLsum_recall": 0.3067417251277733, "rougeLsum_recall_stderr": 0.004374950039871412}}, "1": {"PALM_prompt": {"bleu": 0.5086086062549235, "bleu_stderr": 0.024148152161397957, "rouge1_fmeasure": 0.11613651882382699, "rouge1_fmeasure_stderr": 0.0017869903586088247, "rouge1_precision": 0.07420670646576108, "rouge1_precision_stderr": 0.0012994585087623457, "rouge1_recall": 0.3699833502592873, "rouge1_recall_stderr": 0.005221535560959868, "rouge2_fmeasure": 0.05556040217298211, "rouge2_fmeasure_stderr": 0.0011573526331625286, "rouge2_precision": 0.03528298594442465, "rouge2_precision_stderr": 0.0008131219269542471, "rouge2_recall": 0.18919504627943293, "rouge2_recall_stderr": 0.003948470106675628, "rougeL_fmeasure": 0.11069330166344538, "rougeL_fmeasure_stderr": 0.0016501808624972724, "rougeL_precision": 0.07064592165677544, "rougeL_precision_stderr": 0.0011911523984872805, "rougeL_recall": 0.3529817507096153, "rougeL_recall_stderr": 0.00491517289541728, "rougeLsum_fmeasure": 0.11048988644232176, "rougeLsum_fmeasure_stderr": 0.001670545364215763, "rougeLsum_precision": 0.07063890112491973, "rougeLsum_precision_stderr": 0.0012189437083937394, "rougeLsum_recall": 0.35151452415262047, "rougeLsum_recall_stderr": 0.004837998006757547}}, "2": {"PALM_prompt": {"bleu": 0.6056899396941058, "bleu_stderr": 0.031095911436243143, "rouge1_fmeasure": 0.11269434916911006, "rouge1_fmeasure_stderr": 0.0016709087363002723, "rouge1_precision": 0.07171720323593823, "rouge1_precision_stderr": 0.0013407577731600893, "rouge1_recall": 0.3940444073904271, "rouge1_recall_stderr": 0.005453929450001824, "rouge2_fmeasure": 0.05325348987984883, "rouge2_fmeasure_stderr": 0.0010824823620329564, "rouge2_precision": 0.03342565708524102, "rouge2_precision_stderr": 0.0007679332484666479, "rouge2_recall": 0.20366528828334413, "rouge2_recall_stderr": 0.004145286907818467, "rougeL_fmeasure": 0.10449422734114891, "rougeL_fmeasure_stderr": 0.0014912988107590758, "rougeL_precision": 0.06654587012077771, "rougeL_precision_stderr": 0.0012126902994658729, "rougeL_recall": 0.3626163032278441, "rougeL_recall_stderr": 0.004808677935098044, "rougeLsum_fmeasure": 0.1068591227441311, "rougeLsum_fmeasure_stderr": 0.0015708609058507584, "rougeLsum_precision": 0.06809528741035778, "rougeLsum_precision_stderr": 0.0012705443708076406, "rougeLsum_recall": 0.3717881435944756, "rougeLsum_recall_stderr": 0.005017031823311842}}, "3": {"PALM_prompt": {"bleu": 0.7043691445234566, "bleu_stderr": 0.033436352647492236, "rouge1_fmeasure": 0.11118267231139893, "rouge1_fmeasure_stderr": 0.0016217396109271171, "rouge1_precision": 0.06945197089001147, "rouge1_precision_stderr": 0.0011528009544658436, "rouge1_recall": 0.40339819978438163, "rouge1_recall_stderr": 0.005565480395978604, "rouge2_fmeasure": 0.05211591902402141, "rouge2_fmeasure_stderr": 0.0010331141960554706, "rouge2_precision": 0.03230783770657853, "rouge2_precision_stderr": 0.0007067527472951313, "rouge2_recall": 0.20764571809595184, "rouge2_recall_stderr": 0.004196897185127135, "rougeL_fmeasure": 0.10117012026750102, "rougeL_fmeasure_stderr": 0.0014209168284038964, "rougeL_precision": 0.0633620924773605, "rougeL_precision_stderr": 0.0010232470500386361, "rougeL_recall": 0.3638284378001932, "rougeL_recall_stderr": 0.004793212255287554, "rougeLsum_fmeasure": 0.1053138951283582, "rougeLsum_fmeasure_stderr": 0.001529640334300114, "rougeLsum_precision": 0.06592327408520764, "rougeLsum_precision_stderr": 0.0010958929866110073, "rougeLsum_recall": 0.37984454029428794, "rougeLsum_recall_stderr": 0.005122585721030877}}, "4": {"PALM_prompt": {"bleu": 0.7237940725363642, "bleu_stderr": 0.05068347700106321, "rouge1_fmeasure": 0.10982478834712758, "rouge1_fmeasure_stderr": 0.0015663207694755273, "rouge1_precision": 0.06842641677261545, "rouge1_precision_stderr": 0.0011165170819324747, "rouge1_recall": 0.406375539617853, "rouge1_recall_stderr": 0.00550424978526618, "rouge2_fmeasure": 0.05155868190087433, "rouge2_fmeasure_stderr": 0.0009911919774912529, "rouge2_precision": 0.031843816337460154, "rouge2_precision_stderr": 0.0006802306529242348, "rouge2_recall": 0.2101465380791208, "rouge2_recall_stderr": 0.004148287013449519, "rougeL_fmeasure": 0.09912881209100917, "rougeL_fmeasure_stderr": 0.001366723974954788, "rougeL_precision": 0.06196757937456414, "rougeL_precision_stderr": 0.0009925820821838872, "rougeL_recall": 0.36309144001642407, "rougeL_recall_stderr": 0.004660726913536042, "rougeLsum_fmeasure": 0.10396329790700495, "rougeLsum_fmeasure_stderr": 0.0014800754096392262, "rougeLsum_precision": 0.06489771066521006, "rougeLsum_precision_stderr": 0.0010636680077013658, "rougeLsum_recall": 0.3825474992320362, "rougeLsum_recall_stderr": 0.005083374729929655}}, "5": {"PALM_prompt": {"bleu": 0.8235132351061684, "bleu_stderr": 0.03673652426206423, "rouge1_fmeasure": 0.11000005624917578, "rouge1_fmeasure_stderr": 0.001511602784285709, "rouge1_precision": 0.06807661053530396, "rouge1_precision_stderr": 0.0010608834381468243, "rouge1_recall": 0.41555132879030265, "rouge1_recall_stderr": 0.005712806441867834, "rouge2_fmeasure": 0.05209703492451165, "rouge2_fmeasure_stderr": 0.000966760303297213, "rouge2_precision": 0.03190049650831127, "rouge2_precision_stderr": 0.0006496066520260159, "rouge2_recall": 0.21868118098088812, "rouge2_recall_stderr": 0.004351358375129378, "rougeL_fmeasure": 0.09833778576062344, "rougeL_fmeasure_stderr": 0.0013130705666902862, "rougeL_precision": 0.061096978042372, "rougeL_precision_stderr": 0.0009419140672594094, "rougeL_recall": 0.3679922650615108, "rougeL_recall_stderr": 0.004831134262904494, "rougeLsum_fmeasure": 0.10329798028758783, "rougeLsum_fmeasure_stderr": 0.0014185087292018597, "rougeLsum_precision": 0.06407881465692666, "rougeLsum_precision_stderr": 0.0010057526623759078, "rougeLsum_recall": 0.38744747681119496, "rougeLsum_recall_stderr": 0.00519938044209454}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 1.607743073760489, "bleu_stderr": 0.0505448817544192, "rouge1_fmeasure": 0.16789045871402775, "rouge1_fmeasure_stderr": 0.001899331826630772, "rouge1_precision": 0.141621308728508, "rouge1_precision_stderr": 0.001852197455721959, "rouge1_recall": 0.2487835153814821, "rouge1_recall_stderr": 0.002883968433439267, "rouge2_fmeasure": 0.033980155567562176, "rouge2_fmeasure_stderr": 0.0008772611553649017, "rouge2_precision": 0.028274308481376006, "rouge2_precision_stderr": 0.0007638689773747983, "rouge2_recall": 0.05312688704536566, "rouge2_recall_stderr": 0.001543560570845198, "rougeL_fmeasure": 0.12866222215650755, "rougeL_fmeasure_stderr": 0.0013553903517883699, "rougeL_precision": 0.10723271379275104, "rougeL_precision_stderr": 0.0012832871011880624, "rougeL_recall": 0.19569867076577613, "rougeL_recall_stderr": 0.002336256705578366, "rougeLsum_fmeasure": 0.15551623162738257, "rougeLsum_fmeasure_stderr": 0.0017397466278372554, "rougeLsum_precision": 0.1310312538372522, "rougeLsum_precision_stderr": 0.0016949440714634507, "rougeLsum_recall": 0.2313303827432569, "rougeLsum_recall_stderr": 0.0026938189641655444}}, "1": {"tldr_en": {"bleu": 3.247891638911873, "bleu_stderr": 0.08372328781084526, "rouge1_fmeasure": 0.22971432102282577, "rouge1_fmeasure_stderr": 0.002024992641113895, "rouge1_precision": 0.20162081804133491, "rouge1_precision_stderr": 0.0023006404028398596, "rouge1_recall": 0.3302113914143275, "rouge1_recall_stderr": 0.0029253291213495436, "rouge2_fmeasure": 0.06075703698527234, "rouge2_fmeasure_stderr": 0.0011280888351149878, "rouge2_precision": 0.05353641148052063, "rouge2_precision_stderr": 0.0011596978277920967, "rouge2_recall": 0.0901699660110981, "rouge2_recall_stderr": 0.0018295391165180175, "rougeL_fmeasure": 0.16594057607108714, "rougeL_fmeasure_stderr": 0.0014257612958005858, "rougeL_precision": 0.14458374147535075, "rougeL_precision_stderr": 0.001635799675433344, "rougeL_recall": 0.24465580855285374, "rougeL_recall_stderr": 0.0024107233856668618, "rougeLsum_fmeasure": 0.21655317006571786, "rougeLsum_fmeasure_stderr": 0.001899322620870555, "rougeLsum_precision": 0.18996108622649027, "rougeLsum_precision_stderr": 0.002170230659407399, "rougeLsum_recall": 0.31222699454801467, "rougeLsum_recall_stderr": 0.0027814459127952855}}, "2": {"tldr_en": {"bleu": 3.7546658955082104, "bleu_stderr": 0.05106004200886398, "rouge1_fmeasure": 0.24066179288126918, "rouge1_fmeasure_stderr": 0.0020250503611946593, "rouge1_precision": 0.22464896032753767, "rouge1_precision_stderr": 0.00267471320719044, "rouge1_recall": 0.3334397692148636, "rouge1_recall_stderr": 0.0029039999435442228, "rouge2_fmeasure": 0.0672929256474048, "rouge2_fmeasure_stderr": 0.0011920824100654986, "rouge2_precision": 0.06367029131472855, "rouge2_precision_stderr": 0.0013893020454497178, "rouge2_recall": 0.09620481258465774, "rouge2_recall_stderr": 0.0019131250126952382, "rougeL_fmeasure": 0.17657109086645734, "rougeL_fmeasure_stderr": 0.001468075623241685, "rougeL_precision": 0.16413395555652943, "rougeL_precision_stderr": 0.0019891543999868556, "rougeL_recall": 0.250622872215609, "rougeL_recall_stderr": 0.0024435750314427333, "rougeLsum_fmeasure": 0.2268588669501038, "rougeLsum_fmeasure_stderr": 0.0019039387353264705, "rougeLsum_precision": 0.21176073950316202, "rougeLsum_precision_stderr": 0.0025334766675070507, "rougeLsum_recall": 0.31521561952071925, "rougeLsum_recall_stderr": 0.0027809099107783215}}, "3": {"tldr_en": {"bleu": 3.693352277729935, "bleu_stderr": 0.09530601369536958, "rouge1_fmeasure": 0.1999944588590951, "rouge1_fmeasure_stderr": 0.0024431054075354647, "rouge1_precision": 0.19547574181004312, "rouge1_precision_stderr": 0.003050688739798117, "rouge1_recall": 0.27599613346113083, "rouge1_recall_stderr": 0.003547296490903856, "rouge2_fmeasure": 0.055502076840201975, "rouge2_fmeasure_stderr": 0.0012165658097950253, "rouge2_precision": 0.05556100650606968, "rouge2_precision_stderr": 0.0015316523441232619, "rouge2_recall": 0.07900668123150884, "rouge2_recall_stderr": 0.001936538574501469, "rougeL_fmeasure": 0.14698550688574344, "rougeL_fmeasure_stderr": 0.0018099945049349275, "rougeL_precision": 0.14441472671608785, "rougeL_precision_stderr": 0.0023810920879014986, "rougeL_recall": 0.20695094754990062, "rougeL_recall_stderr": 0.0028534088606798126, "rougeLsum_fmeasure": 0.1893838880235665, "rougeLsum_fmeasure_stderr": 0.0023154515872413177, "rougeLsum_precision": 0.18528466682755065, "rougeLsum_precision_stderr": 0.002913994201194578, "rougeLsum_recall": 0.2616826664782059, "rougeLsum_recall_stderr": 0.0033798208398086076}}, "4": {"tldr_en": {"bleu": 0.7011252343751181, "bleu_stderr": 0.054776720182795745, "rouge1_fmeasure": 0.06295072251821578, "rouge1_fmeasure_stderr": 0.0021227903441220584, "rouge1_precision": 0.063039334135716, "rouge1_precision_stderr": 0.00237217321150148, "rouge1_recall": 0.09017328403874066, "rouge1_recall_stderr": 0.003175334226857727, "rouge2_fmeasure": 0.01740325470887385, "rouge2_fmeasure_stderr": 0.0008413142438273321, "rouge2_precision": 0.01726942511454547, "rouge2_precision_stderr": 0.001004417819364943, "rouge2_recall": 0.026632929529636855, "rouge2_recall_stderr": 0.0014520619320699361, "rougeL_fmeasure": 0.04720899567270062, "rougeL_fmeasure_stderr": 0.0015949102808424699, "rougeL_precision": 0.048037538244205064, "rougeL_precision_stderr": 0.0018761060154017623, "rougeL_recall": 0.06863572497099125, "rougeL_recall_stderr": 0.002484143953625902, "rougeLsum_fmeasure": 0.05941144209472935, "rougeLsum_fmeasure_stderr": 0.0020085067689640797, "rougeLsum_precision": 0.059620692839723696, "rougeLsum_precision_stderr": 0.0022577542164374776, "rougeLsum_recall": 0.08531399766560527, "rougeLsum_recall_stderr": 0.00302563434718094}}, "5": {"tldr_en": {"bleu": 6.442668131454499e-07, "bleu_stderr": 1.6780002001919048e-06, "rouge1_fmeasure": 0.010948945347352195, "rouge1_fmeasure_stderr": 0.0010303712513734353, "rouge1_precision": 0.011023013045319767, "rouge1_precision_stderr": 0.0011239191554005244, "rouge1_recall": 0.01576861876664237, "rouge1_recall_stderr": 0.001514110135289748, "rouge2_fmeasure": 0.003081959728364898, "rouge2_fmeasure_stderr": 0.0003958924398148921, "rouge2_precision": 0.0030642982687765735, "rouge2_precision_stderr": 0.00043761754647970557, "rouge2_recall": 0.0048436181769985496, "rouge2_recall_stderr": 0.0007069513940476318, "rougeL_fmeasure": 0.008113230483687816, "rougeL_fmeasure_stderr": 0.000757965684491531, "rougeL_precision": 0.008303485879920013, "rougeL_precision_stderr": 0.000873077615735369, "rougeL_recall": 0.011933114229936235, "rougeL_recall_stderr": 0.0011773390443061134, "rougeLsum_fmeasure": 0.010379882751860353, "rougeLsum_fmeasure_stderr": 0.0009814470061507048, "rougeLsum_precision": 0.010476549059782741, "rougeLsum_precision_stderr": 0.0010807405840856448, "rougeLsum_recall": 0.015045032229734936, "rougeLsum_recall_stderr": 0.0014570036926186405}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 1.2617835680167517, "bleu_stderr": 0.10635778923661202, "rouge1_fmeasure": 0.14934980508808265, "rouge1_fmeasure_stderr": 0.0016043096357849695, "rouge1_precision": 0.11745138823891885, "rouge1_precision_stderr": 0.0015011277238236715, "rouge1_recall": 0.22985234642531047, "rouge1_recall_stderr": 0.002121425510390003, "rouge2_fmeasure": 0.027899167959252163, "rouge2_fmeasure_stderr": 0.0009510765177625647, "rouge2_precision": 0.022449491334163164, "rouge2_precision_stderr": 0.0007873950105451982, "rouge2_recall": 0.041963515173589365, "rouge2_recall_stderr": 0.0013675959887181714, "rougeL_fmeasure": 0.13395735123983762, "rougeL_fmeasure_stderr": 0.00119951498857243, "rougeL_precision": 0.1035383301816468, "rougeL_precision_stderr": 0.0010714123610581939, "rougeL_recall": 0.21015803872628752, "rougeL_recall_stderr": 0.0017418540261349958, "rougeLsum_fmeasure": 0.13550871061320946, "rougeLsum_fmeasure_stderr": 0.001452232590175344, "rougeLsum_precision": 0.10678429826736537, "rougeLsum_precision_stderr": 0.0013783155650794233, "rougeLsum_recall": 0.20841519216554769, "rougeLsum_recall_stderr": 0.0019094622189363957}}, "1": {"generate_text_restaurant": {"bleu": 8.028465709884875, "bleu_stderr": 0.10414333335862284, "rouge1_fmeasure": 0.39099089747429966, "rouge1_fmeasure_stderr": 0.0019423224150055892, "rouge1_precision": 0.33860487694185304, "rouge1_precision_stderr": 0.002059676862058819, "rouge1_recall": 0.5051950548626472, "rouge1_recall_stderr": 0.00294388539222501, "rouge2_fmeasure": 0.17313651053671014, "rouge2_fmeasure_stderr": 0.0015844557666629471, "rouge2_precision": 0.14891190026236875, "rouge2_precision_stderr": 0.0014722962919980933, "rouge2_recall": 0.2273413357531751, "rouge2_recall_stderr": 0.002240835952327236, "rougeL_fmeasure": 0.2875433639233825, "rougeL_fmeasure_stderr": 0.0015221333146636332, "rougeL_precision": 0.24778622071466067, "rougeL_precision_stderr": 0.001542767658539951, "rougeL_recall": 0.3751611447552265, "rougeL_recall_stderr": 0.0024874116332485865, "rougeLsum_fmeasure": 0.3214385233916606, "rougeLsum_fmeasure_stderr": 0.001926585809990621, "rougeLsum_precision": 0.2789653525284207, "rougeLsum_precision_stderr": 0.001973666047446342, "rougeLsum_recall": 0.4143210047809657, "rougeLsum_recall_stderr": 0.002765128231116627}}, "2": {"generate_text_restaurant": {"bleu": 10.118135078197039, "bleu_stderr": 0.09810052501619766, "rouge1_fmeasure": 0.42448550544175645, "rouge1_fmeasure_stderr": 0.001825472888691242, "rouge1_precision": 0.37391944585631076, "rouge1_precision_stderr": 0.0019127935246138258, "rouge1_recall": 0.5276735152486758, "rouge1_recall_stderr": 0.0028083305880520666, "rouge2_fmeasure": 0.20250984831545202, "rouge2_fmeasure_stderr": 0.0016721804731251128, "rouge2_precision": 0.17745705101705952, "rouge2_precision_stderr": 0.0015612598687988076, "rouge2_recall": 0.25548983218564925, "rouge2_recall_stderr": 0.0023268496099242975, "rougeL_fmeasure": 0.3085528535031243, "rougeL_fmeasure_stderr": 0.0015558243556986532, "rougeL_precision": 0.2708392675295398, "rougeL_precision_stderr": 0.0015293358001324147, "rougeL_recall": 0.3866756916075708, "rougeL_recall_stderr": 0.0024884834616842755, "rougeLsum_fmeasure": 0.35353842822798887, "rougeLsum_fmeasure_stderr": 0.001886743880999753, "rougeLsum_precision": 0.31184277536230626, "rougeLsum_precision_stderr": 0.0018957620411070345, "rougeLsum_recall": 0.43875485442059636, "rougeLsum_recall_stderr": 0.002710851391374029}}, "3": {"generate_text_restaurant": {"bleu": 10.189563951094414, "bleu_stderr": 0.09427499367940084, "rouge1_fmeasure": 0.42389949583381237, "rouge1_fmeasure_stderr": 0.0018605488068051588, "rouge1_precision": 0.36817190833860697, "rouge1_precision_stderr": 0.0019656678844549645, "rouge1_recall": 0.5363504766997446, "rouge1_recall_stderr": 0.0027352574830906383, "rouge2_fmeasure": 0.20474913748031123, "rouge2_fmeasure_stderr": 0.0016798602044551796, "rouge2_precision": 0.17677690202821514, "rouge2_precision_stderr": 0.0015500577175042321, "rouge2_recall": 0.2630023508064778, "rouge2_recall_stderr": 0.0023343418511276868, "rougeL_fmeasure": 0.3118160024380036, "rougeL_fmeasure_stderr": 0.0015347599766876738, "rougeL_precision": 0.26924224328922214, "rougeL_precision_stderr": 0.0014968175338731645, "rougeL_recall": 0.3991179057569062, "rougeL_recall_stderr": 0.002484404009518406, "rougeLsum_fmeasure": 0.3544946381824597, "rougeLsum_fmeasure_stderr": 0.0019027110628313127, "rougeLsum_precision": 0.30800886875805217, "rougeLsum_precision_stderr": 0.0019097659829946047, "rougeLsum_recall": 0.4483937574374488, "rougeLsum_recall_stderr": 0.0026777107579305293}}, "4": {"generate_text_restaurant": {"bleu": 10.087905126304989, "bleu_stderr": 0.12083096625574397, "rouge1_fmeasure": 0.4212315759756044, "rouge1_fmeasure_stderr": 0.0018415320213085158, "rouge1_precision": 0.363770828222443, "rouge1_precision_stderr": 0.0020036630555845163, "rouge1_recall": 0.5387177659214019, "rouge1_recall_stderr": 0.002646963846141895, "rouge2_fmeasure": 0.20471927374375903, "rouge2_fmeasure_stderr": 0.0016719159604382972, "rouge2_precision": 0.17597560551882171, "rouge2_precision_stderr": 0.001571481262205388, "rouge2_recall": 0.2657691623220242, "rouge2_recall_stderr": 0.002333604850349637, "rougeL_fmeasure": 0.3130529200481379, "rougeL_fmeasure_stderr": 0.0015386652999892186, "rougeL_precision": 0.26877802100515913, "rougeL_precision_stderr": 0.0015442331987074057, "rougeL_recall": 0.4052364760809525, "rougeL_recall_stderr": 0.0024774430086498, "rougeLsum_fmeasure": 0.3523313968378458, "rougeLsum_fmeasure_stderr": 0.001878784755073219, "rougeLsum_precision": 0.3043335333487051, "rougeLsum_precision_stderr": 0.0019221279858750424, "rougeLsum_recall": 0.45057659617077206, "rougeLsum_recall_stderr": 0.0026276807170486868}}, "5": {"generate_text_restaurant": {"bleu": 9.819773449865215, "bleu_stderr": 0.11548211534367327, "rouge1_fmeasure": 0.4171512639820159, "rouge1_fmeasure_stderr": 0.0018457472467404608, "rouge1_precision": 0.35770393515726967, "rouge1_precision_stderr": 0.0020171246319611895, "rouge1_recall": 0.539688571960094, "rouge1_recall_stderr": 0.002611808763994368, "rouge2_fmeasure": 0.20263412608550646, "rouge2_fmeasure_stderr": 0.0016590004976459545, "rouge2_precision": 0.1728779412674664, "rouge2_precision_stderr": 0.001552564907957209, "rouge2_recall": 0.26604296450461634, "rouge2_recall_stderr": 0.002299997324816336, "rougeL_fmeasure": 0.3113829743853015, "rougeL_fmeasure_stderr": 0.001519582979021018, "rougeL_precision": 0.2650100444015115, "rougeL_precision_stderr": 0.0015203204596296734, "rougeL_recall": 0.4087745798701937, "rougeL_recall_stderr": 0.0024798181425658206, "rougeLsum_fmeasure": 0.34979542583413165, "rougeLsum_fmeasure_stderr": 0.0018887263526221952, "rougeLsum_precision": 0.3001431880229394, "rougeLsum_precision_stderr": 0.0019447092940593606, "rougeLsum_recall": 0.4521649916174878, "rougeLsum_recall_stderr": 0.0025893467916896255}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 2.660236171533595, "bleu_stderr": 0.13058979906246349, "rouge1_fmeasure": 0.22742139033510708, "rouge1_fmeasure_stderr": 0.0026114290632041296, "rouge1_precision": 0.1640429927941635, "rouge1_precision_stderr": 0.002015041956665472, "rouge1_recall": 0.39341008253163384, "rouge1_recall_stderr": 0.004636047629579328, "rouge2_fmeasure": 0.060906753315850805, "rouge2_fmeasure_stderr": 0.0018640421740763493, "rouge2_precision": 0.043275784885704716, "rouge2_precision_stderr": 0.001336391186147405, "rouge2_recall": 0.10866533118072726, "rouge2_recall_stderr": 0.003402923147178015, "rougeL_fmeasure": 0.17023515816181556, "rougeL_fmeasure_stderr": 0.002061842004493983, "rougeL_precision": 0.12259957209931875, "rougeL_precision_stderr": 0.0015710911079803718, "rougeL_recall": 0.2958746933623946, "rougeL_recall_stderr": 0.0037695130327974375, "rougeLsum_fmeasure": 0.1796488354953124, "rougeLsum_fmeasure_stderr": 0.00228884690406039, "rougeLsum_precision": 0.12928277474166358, "rougeLsum_precision_stderr": 0.0017231366356998507, "rougeLsum_recall": 0.31266179796436216, "rougeLsum_recall_stderr": 0.004192600769757827}}, "1": {"article_DOC_summary": {"bleu": 1.8314489114971673, "bleu_stderr": 0.05409360756715158, "rouge1_fmeasure": 0.19173204586716094, "rouge1_fmeasure_stderr": 0.002639692576988235, "rouge1_precision": 0.13655953349875768, "rouge1_precision_stderr": 0.0019656757175551577, "rouge1_recall": 0.3351266289452704, "rouge1_recall_stderr": 0.0045506786659058425, "rouge2_fmeasure": 0.043509124193048414, "rouge2_fmeasure_stderr": 0.0016166226125424621, "rouge2_precision": 0.030642922658876727, "rouge2_precision_stderr": 0.001142930640307047, "rouge2_recall": 0.07840904303144859, "rouge2_recall_stderr": 0.003007597927500564, "rougeL_fmeasure": 0.1479712469429257, "rougeL_fmeasure_stderr": 0.002021808544636796, "rougeL_precision": 0.10515008122500365, "rougeL_precision_stderr": 0.0014878667949867396, "rougeL_recall": 0.2603291047174206, "rougeL_recall_stderr": 0.0036304258303670577, "rougeLsum_fmeasure": 0.15295586712265904, "rougeLsum_fmeasure_stderr": 0.0022389650188625095, "rougeLsum_precision": 0.10864088340556836, "rougeLsum_precision_stderr": 0.0016392672849460208, "rougeLsum_recall": 0.26926885885160073, "rougeLsum_recall_stderr": 0.004010439316574938}}, "2": {"article_DOC_summary": {"bleu": 2.3328395887156255, "bleu_stderr": 0.10816864533274577, "rouge1_fmeasure": 0.20690402045254036, "rouge1_fmeasure_stderr": 0.0027232531224059726, "rouge1_precision": 0.14713632905683816, "rouge1_precision_stderr": 0.0020215279085607075, "rouge1_recall": 0.3625697155906082, "rouge1_recall_stderr": 0.004676136272316295, "rouge2_fmeasure": 0.05427974421404337, "rouge2_fmeasure_stderr": 0.0017696178332732388, "rouge2_precision": 0.03817870519938818, "rouge2_precision_stderr": 0.0012523560434755418, "rouge2_recall": 0.09803539445968558, "rouge2_recall_stderr": 0.003268351761516068, "rougeL_fmeasure": 0.16250269839239403, "rougeL_fmeasure_stderr": 0.0021208807462068965, "rougeL_precision": 0.11527249431509622, "rougeL_precision_stderr": 0.0015525999339183435, "rougeL_recall": 0.28699007838315493, "rougeL_recall_stderr": 0.003846678173723021, "rougeLsum_fmeasure": 0.16643954154187726, "rougeLsum_fmeasure_stderr": 0.0023327546790871127, "rougeLsum_precision": 0.11807937715144022, "rougeLsum_precision_stderr": 0.0017041758270634855, "rougeLsum_recall": 0.2936525618807245, "rougeLsum_recall_stderr": 0.004161272394841371}}, "3": {"article_DOC_summary": {"bleu": 2.4415835212159744, "bleu_stderr": 0.09205861798425756, "rouge1_fmeasure": 0.20790416673134823, "rouge1_fmeasure_stderr": 0.0027904145464295793, "rouge1_precision": 0.15050477129545162, "rouge1_precision_stderr": 0.0021650717494144725, "rouge1_recall": 0.35833402440286477, "rouge1_recall_stderr": 0.004875928275119348, "rouge2_fmeasure": 0.05578255582904122, "rouge2_fmeasure_stderr": 0.0017532403099110591, "rouge2_precision": 0.03967555502420575, "rouge2_precision_stderr": 0.001257612569171145, "rouge2_recall": 0.0995195510617509, "rouge2_recall_stderr": 0.0032409089175577817, "rougeL_fmeasure": 0.1588561646225248, "rougeL_fmeasure_stderr": 0.00218993426182156, "rougeL_precision": 0.11465132801092887, "rougeL_precision_stderr": 0.001666726965584784, "rougeL_recall": 0.27592762804713344, "rougeL_recall_stderr": 0.004014306258609167, "rougeLsum_fmeasure": 0.1652197759476937, "rougeLsum_fmeasure_stderr": 0.002376223870136714, "rougeLsum_precision": 0.11933090671402025, "rougeLsum_precision_stderr": 0.0018043535233345025, "rougeLsum_recall": 0.28657490925873824, "rougeLsum_recall_stderr": 0.004297538545009866}}, "4": {"article_DOC_summary": {"bleu": 1.0886930800027617, "bleu_stderr": 0.13685163756146332, "rouge1_fmeasure": 0.05651233636898341, "rouge1_fmeasure_stderr": 0.0030905378199610973, "rouge1_precision": 0.04479833541552641, "rouge1_precision_stderr": 0.0024920338028095244, "rouge1_recall": 0.09115873549013587, "rouge1_recall_stderr": 0.0051576903382473185, "rouge2_fmeasure": 0.014022544687027934, "rouge2_fmeasure_stderr": 0.0011421600358457382, "rouge2_precision": 0.010549200347804146, "rouge2_precision_stderr": 0.0008786327894641382, "rouge2_recall": 0.024034279053592206, "rouge2_recall_stderr": 0.0020028495235980617, "rougeL_fmeasure": 0.04286140894694921, "rougeL_fmeasure_stderr": 0.002370447391151435, "rougeL_precision": 0.03426844067651488, "rougeL_precision_stderr": 0.0019596209525071014, "rougeL_recall": 0.06925411151199752, "rougeL_recall_stderr": 0.003988959935482703, "rougeLsum_fmeasure": 0.045863687869416005, "rougeLsum_fmeasure_stderr": 0.0025272572767658435, "rougeLsum_precision": 0.036540645050964785, "rougeLsum_precision_stderr": 0.0020678075006755215, "rougeLsum_recall": 0.07413354565538481, "rougeLsum_recall_stderr": 0.004252794848785553}}, "5": {"article_DOC_summary": {"bleu": 2.170644306962683e-19, "bleu_stderr": 5.499005165072242e-15, "rouge1_fmeasure": 0.0030531767218957887, "rouge1_fmeasure_stderr": 0.0008235812485186412, "rouge1_precision": 0.0027738518451789936, "rouge1_precision_stderr": 0.0007772852422880741, "rouge1_recall": 0.004449774503151586, "rouge1_recall_stderr": 0.0013000230402188415, "rouge2_fmeasure": 0.000461932013569787, "rouge2_fmeasure_stderr": 0.00019747062019636368, "rouge2_precision": 0.0004318387593770127, "rouge2_precision_stderr": 0.0002096018140886139, "rouge2_recall": 0.0006493854199619724, "rouge2_recall_stderr": 0.0002769463291779765, "rougeL_fmeasure": 0.00206141715660179, "rougeL_fmeasure_stderr": 0.0005827251790517291, "rougeL_precision": 0.0018680723098669894, "rougeL_precision_stderr": 0.0005358076043197855, "rougeL_recall": 0.0029818697516115103, "rougeL_recall_stderr": 0.0008929801630497813, "rougeLsum_fmeasure": 0.002323102977881885, "rougeLsum_fmeasure_stderr": 0.0006354398848530671, "rougeLsum_precision": 0.002156304889205613, "rougeLsum_precision_stderr": 0.0006383957765946712, "rougeLsum_recall": 0.003337441465976244, "rougeLsum_recall_stderr": 0.0009699711366361862}}}} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_0.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_0.json new file mode 100644 index 0000000000000000000000000000000000000000..37d0cac1c7d9f0112de36fafdac3ce6f512a79d4 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.4274462846584994, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.026506045585829644 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.0743940764798566, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0014865309649260369 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.32181178468311483, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0046779793640138664 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.11401591584123742, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0020018749467427548 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.035398271042759596, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0009285365780968767 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.15703798625688678, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0033071200278070876 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.05433244012190525, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0012833100172831976 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.07120802247039631, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001362906540968755 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.31125592253868395, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004548153090547622 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.10946929754745122, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0018607174533106234 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.07100637180754554, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.001398755345483045 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.3067417251277733, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.004374950039871412 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.10881188996025698, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0018835358686024326 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_1.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_1.json new file mode 100644 index 0000000000000000000000000000000000000000..00aece6d407df11f75ff90b2b142b11bb9932ba3 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.5086086062549235, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.024148152161397957 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.07420670646576108, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0012994585087623457 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.3699833502592873, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.005221535560959868 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.11613651882382699, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0017869903586088247 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.03528298594442465, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0008131219269542471 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.18919504627943293, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.003948470106675628 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.05556040217298211, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0011573526331625286 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.07064592165677544, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0011911523984872805 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.3529817507096153, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.00491517289541728 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.11069330166344538, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0016501808624972724 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.07063890112491973, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0012189437083937394 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.35151452415262047, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.004837998006757547 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.11048988644232176, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001670545364215763 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_2.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f143ecb6a3dd2bed73a0e4e5b2ec2dca4684a36d --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.6056899396941058, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.031095911436243143 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.07171720323593823, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0013407577731600893 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.3940444073904271, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.005453929450001824 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.11269434916911006, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0016709087363002723 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.03342565708524102, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0007679332484666479 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.20366528828334413, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.004145286907818467 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.05325348987984883, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0010824823620329564 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.06654587012077771, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0012126902994658729 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.3626163032278441, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004808677935098044 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.10449422734114891, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0014912988107590758 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.06809528741035778, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0012705443708076406 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.3717881435944756, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.005017031823311842 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.1068591227441311, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0015708609058507584 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_3.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_3.json new file mode 100644 index 0000000000000000000000000000000000000000..de98df32cc45602e80cdfbbac7b685acbe2fa32c --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.7043691445234566, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.033436352647492236 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.06945197089001147, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0011528009544658436 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.40339819978438163, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.005565480395978604 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.11118267231139893, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0016217396109271171 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.03230783770657853, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0007067527472951313 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.20764571809595184, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.004196897185127135 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.05211591902402141, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0010331141960554706 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.0633620924773605, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0010232470500386361 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.3638284378001932, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004793212255287554 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.10117012026750102, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0014209168284038964 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.06592327408520764, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0010958929866110073 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.37984454029428794, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.005122585721030877 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.1053138951283582, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001529640334300114 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_4.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_4.json new file mode 100644 index 0000000000000000000000000000000000000000..65a77d613bbc37e0a170098c193f4626da2e2e3e --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.7237940725363642, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.05068347700106321 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.06842641677261545, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0011165170819324747 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.406375539617853, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.00550424978526618 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.10982478834712758, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0015663207694755273 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.031843816337460154, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006802306529242348 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.2101465380791208, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.004148287013449519 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.05155868190087433, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009911919774912529 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.06196757937456414, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0009925820821838872 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.36309144001642407, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004660726913536042 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.09912881209100917, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.001366723974954788 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.06489771066521006, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0010636680077013658 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.3825474992320362, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.005083374729929655 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.10396329790700495, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014800754096392262 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_5.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_5.json new file mode 100644 index 0000000000000000000000000000000000000000..7cd0610e44134db18db448d0703ee5aad67919cf --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-web_nlg_en_PALM_prompt_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.8235132351061684, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.03673652426206423 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.06807661053530396, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0010608834381468243 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.41555132879030265, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.005712806441867834 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.11000005624917578, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001511602784285709 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.03190049650831127, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006496066520260159 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.21868118098088812, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.004351358375129378 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.05209703492451165, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.000966760303297213 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.061096978042372, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0009419140672594094 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.3679922650615108, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004831134262904494 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.09833778576062344, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0013130705666902862 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.06407881465692666, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0010057526623759078 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.38744747681119496, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.00519938044209454 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.10329798028758783, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014185087292018597 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_0.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..00e13584a47b5aaa1eb2ff0d93f3a9dd02f40592 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.141621308728508, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001852197455721959 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.2487835153814821, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.002883968433439267 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.16789045871402775, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001899331826630772 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.028274308481376006, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0007638689773747983 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.05312688704536566, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.001543560570845198 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.033980155567562176, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0008772611553649017 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.10723271379275104, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0012832871011880624 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.19569867076577613, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.002336256705578366 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.12866222215650755, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0013553903517883699 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.1310312538372522, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0016949440714634507 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.2313303827432569, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0026938189641655444 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.15551623162738257, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0017397466278372554 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 1.607743073760489, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.0505448817544192 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_1.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..f80eeea7da2a1f72b0c1a37fe7dec585684cffc3 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.20162081804133491, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0023006404028398596 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.3302113914143275, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0029253291213495436 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.22971432102282577, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.002024992641113895 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.05353641148052063, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0011596978277920967 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.0901699660110981, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0018295391165180175 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.06075703698527234, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0011280888351149878 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.14458374147535075, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001635799675433344 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.24465580855285374, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0024107233856668618 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.16594057607108714, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0014257612958005858 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.18996108622649027, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.002170230659407399 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.31222699454801467, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0027814459127952855 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.21655317006571786, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001899322620870555 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 3.247891638911873, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.08372328781084526 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_2.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..79220d64b6d9e9304ec86608982936acab70f7ac --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.22464896032753767, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.00267471320719044 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.3334397692148636, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0029039999435442228 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.24066179288126918, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0020250503611946593 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.06367029131472855, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0013893020454497178 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.09620481258465774, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0019131250126952382 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.0672929256474048, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0011920824100654986 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.16413395555652943, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0019891543999868556 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.250622872215609, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0024435750314427333 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.17657109086645734, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.001468075623241685 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.21176073950316202, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0025334766675070507 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.31521561952071925, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0027809099107783215 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.2268588669501038, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0019039387353264705 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 3.7546658955082104, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.05106004200886398 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_3.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..5ce6d743e30437ef7fff40a404a72379b610dc48 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.19547574181004312, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.003050688739798117 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.27599613346113083, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003547296490903856 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.1999944588590951, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0024431054075354647 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.05556100650606968, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0015316523441232619 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.07900668123150884, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.001936538574501469 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.055502076840201975, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0012165658097950253 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.14441472671608785, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0023810920879014986 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.20695094754990062, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0028534088606798126 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.14698550688574344, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0018099945049349275 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.18528466682755065, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.002913994201194578 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.2616826664782059, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0033798208398086076 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.1893838880235665, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0023154515872413177 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 3.693352277729935, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.09530601369536958 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_4.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..21af6499f985734c93e436c206508efc5eb18d5b --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.063039334135716, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.00237217321150148 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.09017328403874066, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003175334226857727 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.06295072251821578, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0021227903441220584 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.01726942511454547, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.001004417819364943 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.026632929529636855, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0014520619320699361 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.01740325470887385, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0008413142438273321 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.048037538244205064, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0018761060154017623 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.06863572497099125, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.002484143953625902 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.04720899567270062, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0015949102808424699 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.059620692839723696, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0022577542164374776 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.08531399766560527, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.00302563434718094 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.05941144209472935, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0020085067689640797 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.7011252343751181, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.054776720182795745 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_5.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..0537fab77c5452cb225e10d3654813595cef3857 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_GEM-wiki_lingua_en_tldr_en_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.011023013045319767, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0011239191554005244 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.01576861876664237, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.001514110135289748 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.010948945347352195, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0010303712513734353 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.0030642982687765735, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00043761754647970557 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.0048436181769985496, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0007069513940476318 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.003081959728364898, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0003958924398148921 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.008303485879920013, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.000873077615735369 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.011933114229936235, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0011773390443061134 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.008113230483687816, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.000757965684491531 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.010476549059782741, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0010807405840856448 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.015045032229734936, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0014570036926186405 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.010379882751860353, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0009814470061507048 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 6.442668131454499e-07, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 1.6780002001919048e-06 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_0.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_0.json new file mode 100644 index 0000000000000000000000000000000000000000..8b0e8ff2916238b5e429b1e77eaf6d98151c7dfa --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 1.2617835680167517, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.10635778923661202 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.11745138823891885, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0015011277238236715 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.22985234642531047, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002121425510390003 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.14934980508808265, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0016043096357849695 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.022449491334163164, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0007873950105451982 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.041963515173589365, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0013675959887181714 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.027899167959252163, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0009510765177625647 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.1035383301816468, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0010714123610581939 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.21015803872628752, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0017418540261349958 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.13395735123983762, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.00119951498857243 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.10678429826736537, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0013783155650794233 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.20841519216554769, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0019094622189363957 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.13550871061320946, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001452232590175344 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_1.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_1.json new file mode 100644 index 0000000000000000000000000000000000000000..f4ed281ccf22ba12e96ac60f5ac58f9a2156bce1 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 8.028465709884875, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.10414333335862284 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.33860487694185304, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.002059676862058819 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.5051950548626472, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.00294388539222501 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.39099089747429966, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0019423224150055892 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.14891190026236875, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0014722962919980933 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.2273413357531751, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.002240835952327236 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.17313651053671014, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0015844557666629471 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.24778622071466067, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.001542767658539951 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.3751611447552265, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0024874116332485865 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.2875433639233825, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0015221333146636332 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.2789653525284207, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.001973666047446342 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.4143210047809657, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002765128231116627 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.3214385233916606, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001926585809990621 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_2.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_2.json new file mode 100644 index 0000000000000000000000000000000000000000..7bebb78360bdea8e28bfdee78222fb393b0adcac --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 10.118135078197039, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.09810052501619766 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.37391944585631076, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0019127935246138258 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.5276735152486758, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0028083305880520666 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.42448550544175645, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.001825472888691242 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.17745705101705952, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0015612598687988076 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.25548983218564925, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0023268496099242975 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.20250984831545202, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0016721804731251128 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.2708392675295398, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0015293358001324147 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.3866756916075708, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0024884834616842755 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.3085528535031243, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0015558243556986532 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.31184277536230626, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0018957620411070345 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.43875485442059636, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002710851391374029 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.35353842822798887, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001886743880999753 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_3.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_3.json new file mode 100644 index 0000000000000000000000000000000000000000..d0b64f16d4056536d8608d89394f1b238028ed0a --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 10.189563951094414, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.09427499367940084 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.36817190833860697, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0019656678844549645 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.5363504766997446, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0027352574830906383 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.42389949583381237, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0018605488068051588 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.17677690202821514, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0015500577175042321 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.2630023508064778, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0023343418511276868 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.20474913748031123, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0016798602044551796 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.26924224328922214, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0014968175338731645 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.3991179057569062, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.002484404009518406 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.3118160024380036, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0015347599766876738 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.30800886875805217, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0019097659829946047 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.4483937574374488, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0026777107579305293 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.3544946381824597, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0019027110628313127 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_4.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_4.json new file mode 100644 index 0000000000000000000000000000000000000000..041706fd06ca92146d1b92d8303b050e05492430 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 10.087905126304989, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.12083096625574397 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.363770828222443, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0020036630555845163 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.5387177659214019, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002646963846141895 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.4212315759756044, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0018415320213085158 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.17597560551882171, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.001571481262205388 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.2657691623220242, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.002333604850349637 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.20471927374375903, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0016719159604382972 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.26877802100515913, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0015442331987074057 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.4052364760809525, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0024774430086498 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.3130529200481379, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0015386652999892186 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.3043335333487051, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0019221279858750424 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.45057659617077206, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0026276807170486868 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.3523313968378458, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001878784755073219 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_5.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_5.json new file mode 100644 index 0000000000000000000000000000000000000000..91d6e679fb68498fe7f1d72263d0871c558ca0d0 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_e2e_nlg_cleaned_generate_text_restaurant_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 9.819773449865215, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.11548211534367327 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.35770393515726967, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0020171246319611895 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.539688571960094, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002611808763994368 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.4171512639820159, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0018457472467404608 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.1728779412674664, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.001552564907957209 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.26604296450461634, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.002299997324816336 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.20263412608550646, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0016590004976459545 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.2650100444015115, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0015203204596296734 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.4087745798701937, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0024798181425658206 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.3113829743853015, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.001519582979021018 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.3001431880229394, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0019447092940593606 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.4521649916174878, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0025893467916896255 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.34979542583413165, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0018887263526221952 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_gem_xsum_article_DOC_summary_0.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_gem_xsum_article_DOC_summary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..c1e1d4483d3aad331c21c3b5366592db7360c8b0 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_gem_xsum_article_DOC_summary_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.1640429927941635, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.002015041956665472 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.39341008253163384, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.004636047629579328 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.22742139033510708, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0026114290632041296 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.043275784885704716, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.001336391186147405 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.10866533118072726, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.003402923147178015 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.060906753315850805, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0018640421740763493 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.12259957209931875, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0015710911079803718 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.2958746933623946, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0037695130327974375 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.17023515816181556, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.002061842004493983 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.12928277474166358, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0017231366356998507 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.31266179796436216, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.004192600769757827 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.1796488354953124, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.00228884690406039 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 2.660236171533595, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.13058979906246349 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_gem_xsum_article_DOC_summary_1.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_gem_xsum_article_DOC_summary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..bed635092d1ccca5d5ad8cff06fe33a37183df4f --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_gem_xsum_article_DOC_summary_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.13655953349875768, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0019656757175551577 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.3351266289452704, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0045506786659058425 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.19173204586716094, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002639692576988235 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.030642922658876727, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.001142930640307047 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.07840904303144859, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.003007597927500564 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.043509124193048414, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0016166226125424621 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.10515008122500365, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0014878667949867396 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.2603291047174206, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0036304258303670577 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.1479712469429257, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.002021808544636796 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.10864088340556836, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0016392672849460208 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.26926885885160073, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.004010439316574938 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.15295586712265904, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0022389650188625095 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 1.8314489114971673, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.05409360756715158 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_gem_xsum_article_DOC_summary_2.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_gem_xsum_article_DOC_summary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..98db468262337f7a36578593432df2350606a825 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_gem_xsum_article_DOC_summary_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.14713632905683816, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0020215279085607075 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.3625697155906082, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.004676136272316295 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.20690402045254036, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0027232531224059726 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.03817870519938818, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0012523560434755418 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.09803539445968558, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.003268351761516068 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.05427974421404337, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0017696178332732388 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.11527249431509622, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0015525999339183435 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.28699007838315493, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.003846678173723021 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.16250269839239403, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0021208807462068965 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.11807937715144022, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0017041758270634855 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.2936525618807245, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.004161272394841371 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.16643954154187726, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0023327546790871127 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 2.3328395887156255, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.10816864533274577 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_gem_xsum_article_DOC_summary_3.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_gem_xsum_article_DOC_summary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..686002f7a4b54cb2244fe6ba7a957a28bea43b13 --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_gem_xsum_article_DOC_summary_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.15050477129545162, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0021650717494144725 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.35833402440286477, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.004875928275119348 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.20790416673134823, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0027904145464295793 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.03967555502420575, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.001257612569171145 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.0995195510617509, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0032409089175577817 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.05578255582904122, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0017532403099110591 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.11465132801092887, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.001666726965584784 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.27592762804713344, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.004014306258609167 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.1588561646225248, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.00218993426182156 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.11933090671402025, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0018043535233345025 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.28657490925873824, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.004297538545009866 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.1652197759476937, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.002376223870136714 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 2.4415835212159744, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.09205861798425756 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_gem_xsum_article_DOC_summary_4.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_gem_xsum_article_DOC_summary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..b60b149352ad2c4f62f07696d3c173248b64a06b --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_gem_xsum_article_DOC_summary_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.04479833541552641, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0024920338028095244 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.09115873549013587, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0051576903382473185 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.05651233636898341, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0030905378199610973 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.010549200347804146, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0008786327894641382 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.024034279053592206, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0020028495235980617 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.014022544687027934, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0011421600358457382 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.03426844067651488, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0019596209525071014 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.06925411151199752, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.003988959935482703 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.04286140894694921, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.002370447391151435 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.036540645050964785, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0020678075006755215 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.07413354565538481, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.004252794848785553 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.045863687869416005, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0025272572767658435 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 1.0886930800027617, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.13685163756146332 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_gem_xsum_article_DOC_summary_5.json b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_gem_xsum_article_DOC_summary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..7f6bc0ce5a4c107b68fdaf2908a2a58e57d58cbe --- /dev/null +++ b/8b7178b25bopt/evaluation/generation/slim.8b7178b25bopt_gem_xsum_article_DOC_summary_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.0027738518451789936, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0007772852422880741 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.004449774503151586, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0013000230402188415 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.0030531767218957887, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0008235812485186412 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.0004318387593770127, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0002096018140886139 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.0006493854199619724, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0002769463291779765 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.000461932013569787, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.00019747062019636368 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.0018680723098669894, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0005358076043197855 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.0029818697516115103, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0008929801630497813 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.00206141715660179, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0005827251790517291 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.002156304889205613, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0006383957765946712 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.003337441465976244, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0009699711366361862 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.002323102977881885, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0006354398848530671 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 2.170644306962683e-19, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 5.499005165072242e-15 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b25bopt/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 8, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_0.csv b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_0.csv new file mode 100644 index 0000000000000000000000000000000000000000..bc0c03e22a9d875337f8daa5f1bf6316938eba1a --- /dev/null +++ b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_0.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.33,0.014876872027456738,0 +anli_r2,acc,0.344,0.015029633724408943,0 +anli_r3,acc,0.3616666666666667,0.013876131663123877,0 +arc_challenge,acc,0.3250853242320819,0.013688147309729119,0 +arc_challenge,acc_norm,0.34215017064846415,0.013864152159177275,0 +arc_easy,acc,0.6654040404040404,0.009682137724327907,0 +arc_easy,acc_norm,0.5997474747474747,0.010053550119896138,0 +boolq,acc,0.6351681957186545,0.008419440984963646,1 +cb,acc,0.25,0.058387420812114225,1 +cb,f1,0.1693693693693694,,1 +copa,acc,0.83,0.03775251680686371,0 +hellaswag,acc,0.5343557060346544,0.004977988452502641,0 +hellaswag,acc_norm,0.7091216889065923,0.004532393111248685,0 +piqa,acc,0.7676822633297062,0.009853201384168241,0 +piqa,acc_norm,0.7823721436343852,0.00962740747484087,0 +rte,acc,0.5667870036101083,0.02982676408213827,0 +sciq,acc,0.894,0.009739551265785138,0 +sciq,acc_norm,0.822,0.012102167676183589,0 +storycloze_2016,acc,0.7573490112239444,0.009913300265342059,0 +winogrande,acc,0.6250986582478295,0.013605544523788008,0 diff --git a/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_0.json b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_0.json new file mode 100644 index 0000000000000000000000000000000000000000..73ea1e90bd7805da14f1308813b1555af1ed96cf --- /dev/null +++ b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_0.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.33, + "acc_stderr": 0.014876872027456738 + }, + "anli_r2": { + "acc": 0.344, + "acc_stderr": 0.015029633724408943 + }, + "anli_r3": { + "acc": 0.3616666666666667, + "acc_stderr": 0.013876131663123877 + }, + "cb": { + "acc": 0.25, + "acc_stderr": 0.058387420812114225, + "f1": 0.1693693693693694 + }, + "copa": { + "acc": 0.83, + "acc_stderr": 0.03775251680686371 + }, + "hellaswag": { + "acc": 0.5343557060346544, + "acc_stderr": 0.004977988452502641, + "acc_norm": 0.7091216889065923, + "acc_norm_stderr": 0.004532393111248685 + }, + "rte": { + "acc": 0.5667870036101083, + "acc_stderr": 0.02982676408213827 + }, + "winogrande": { + "acc": 0.6250986582478295, + "acc_stderr": 0.013605544523788008 + }, + "storycloze_2016": { + "acc": 0.7573490112239444, + "acc_stderr": 0.009913300265342059 + }, + "boolq": { + "acc": 0.6351681957186545, + "acc_stderr": 0.008419440984963646 + }, + "arc_easy": { + "acc": 0.6654040404040404, + "acc_stderr": 0.009682137724327907, + "acc_norm": 0.5997474747474747, + "acc_norm_stderr": 0.010053550119896138 + }, + "arc_challenge": { + "acc": 0.3250853242320819, + "acc_stderr": 0.013688147309729119, + "acc_norm": 0.34215017064846415, + "acc_norm_stderr": 0.013864152159177275 + }, + "sciq": { + "acc": 0.894, + "acc_stderr": 0.009739551265785138, + "acc_norm": 0.822, + "acc_norm_stderr": 0.012102167676183589 + }, + "piqa": { + "acc": 0.7676822633297062, + "acc_stderr": 0.009853201384168241, + "acc_norm": 0.7823721436343852, + "acc_norm_stderr": 0.00962740747484087 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_1.csv b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_1.csv new file mode 100644 index 0000000000000000000000000000000000000000..21e95110799b7591d9213812dcd5c998aded361d --- /dev/null +++ b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_1.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.319,0.014746404865473479,0 +anli_r2,acc,0.318,0.0147340793093119,0 +anli_r3,acc,0.3458333333333333,0.013736245342311012,0 +arc_challenge,acc,0.34215017064846415,0.013864152159177278,0 +arc_challenge,acc_norm,0.35494880546075086,0.013983036904094095,0 +arc_easy,acc,0.6704545454545454,0.009645184190953855,0 +arc_easy,acc_norm,0.6439393939393939,0.009825454608416303,0 +boolq,acc,0.6461773700305811,0.008362983020904465,1 +cb,acc,0.4642857142857143,0.06724777654937658,1 +cb,f1,0.31876138433515483,,1 +copa,acc,0.81,0.03942772444036623,0 +hellaswag,acc,0.533559051981677,0.0049785296421409365,0 +hellaswag,acc_norm,0.7054371639115714,0.004549143750428458,0 +piqa,acc,0.7736670293797606,0.009763294246879425,0 +piqa,acc_norm,0.7823721436343852,0.009627407474840869,0 +rte,acc,0.5523465703971119,0.02993107036293953,0 +sciq,acc,0.928,0.008178195576218681,0 +sciq,acc_norm,0.915,0.008823426366942317,0 +storycloze_2016,acc,0.7455905932656334,0.010071542492663043,0 +winogrande,acc,0.6179952644041041,0.013655578215970422,0 diff --git a/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_1.json b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_1.json new file mode 100644 index 0000000000000000000000000000000000000000..4e40f45c444f067af0b31c27dda2646bda3f4c59 --- /dev/null +++ b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_1.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.319, + "acc_stderr": 0.014746404865473479 + }, + "anli_r2": { + "acc": 0.318, + "acc_stderr": 0.0147340793093119 + }, + "anli_r3": { + "acc": 0.3458333333333333, + "acc_stderr": 0.013736245342311012 + }, + "cb": { + "acc": 0.4642857142857143, + "acc_stderr": 0.06724777654937658, + "f1": 0.31876138433515483 + }, + "copa": { + "acc": 0.81, + "acc_stderr": 0.03942772444036623 + }, + "hellaswag": { + "acc": 0.533559051981677, + "acc_stderr": 0.0049785296421409365, + "acc_norm": 0.7054371639115714, + "acc_norm_stderr": 0.004549143750428458 + }, + "rte": { + "acc": 0.5523465703971119, + "acc_stderr": 0.02993107036293953 + }, + "winogrande": { + "acc": 0.6179952644041041, + "acc_stderr": 0.013655578215970422 + }, + "storycloze_2016": { + "acc": 0.7455905932656334, + "acc_stderr": 0.010071542492663043 + }, + "boolq": { + "acc": 0.6461773700305811, + "acc_stderr": 0.008362983020904465 + }, + "arc_easy": { + "acc": 0.6704545454545454, + "acc_stderr": 0.009645184190953855, + "acc_norm": 0.6439393939393939, + "acc_norm_stderr": 0.009825454608416303 + }, + "arc_challenge": { + "acc": 0.34215017064846415, + "acc_stderr": 0.013864152159177278, + "acc_norm": 0.35494880546075086, + "acc_norm_stderr": 0.013983036904094095 + }, + "sciq": { + "acc": 0.928, + "acc_stderr": 0.008178195576218681, + "acc_norm": 0.915, + "acc_norm_stderr": 0.008823426366942317 + }, + "piqa": { + "acc": 0.7736670293797606, + "acc_stderr": 0.009763294246879425, + "acc_norm": 0.7823721436343852, + "acc_norm_stderr": 0.009627407474840869 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_2.csv b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_2.csv new file mode 100644 index 0000000000000000000000000000000000000000..0c127f637c1f203e12e618a79d52c41ed13172bf --- /dev/null +++ b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_2.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.316,0.014709193056057107,0 +anli_r2,acc,0.332,0.014899597242811478,0 +anli_r3,acc,0.34,0.013680495725767803,0 +arc_challenge,acc,0.34897610921501704,0.013928933461382497,0 +arc_challenge,acc_norm,0.36860068259385664,0.014097810678042184,0 +arc_easy,acc,0.6792929292929293,0.00957747457110883,0 +arc_easy,acc_norm,0.6670875420875421,0.009669958978395326,0 +boolq,acc,0.6516819571865443,0.008332942286688303,1 +cb,acc,0.30357142857142855,0.06199938655510754,1 +cb,f1,0.2236842105263158,,1 +copa,acc,0.84,0.03684529491774709,0 +hellaswag,acc,0.5338577972515435,0.004978328190775526,0 +hellaswag,acc_norm,0.7099183429595698,0.004528723951878254,0 +piqa,acc,0.7687704026115343,0.00983706318062533,0 +piqa,acc_norm,0.7829162132752993,0.009618708415756785,0 +rte,acc,0.5415162454873647,0.029992535385373314,0 +sciq,acc,0.934,0.007855297938697587,0 +sciq,acc_norm,0.925,0.008333333333333326,0 +storycloze_2016,acc,0.7600213789417424,0.009875938525582594,0 +winogrande,acc,0.6369376479873717,0.013515191866479221,0 diff --git a/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_2.json b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_2.json new file mode 100644 index 0000000000000000000000000000000000000000..d36aad1f939ed66724dced22642bc1bd44dc784e --- /dev/null +++ b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_2.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.316, + "acc_stderr": 0.014709193056057107 + }, + "anli_r2": { + "acc": 0.332, + "acc_stderr": 0.014899597242811478 + }, + "anli_r3": { + "acc": 0.34, + "acc_stderr": 0.013680495725767803 + }, + "cb": { + "acc": 0.30357142857142855, + "acc_stderr": 0.06199938655510754, + "f1": 0.2236842105263158 + }, + "copa": { + "acc": 0.84, + "acc_stderr": 0.03684529491774709 + }, + "hellaswag": { + "acc": 0.5338577972515435, + "acc_stderr": 0.004978328190775526, + "acc_norm": 0.7099183429595698, + "acc_norm_stderr": 0.004528723951878254 + }, + "rte": { + "acc": 0.5415162454873647, + "acc_stderr": 0.029992535385373314 + }, + "winogrande": { + "acc": 0.6369376479873717, + "acc_stderr": 0.013515191866479221 + }, + "storycloze_2016": { + "acc": 0.7600213789417424, + "acc_stderr": 0.009875938525582594 + }, + "boolq": { + "acc": 0.6516819571865443, + "acc_stderr": 0.008332942286688303 + }, + "arc_easy": { + "acc": 0.6792929292929293, + "acc_stderr": 0.00957747457110883, + "acc_norm": 0.6670875420875421, + "acc_norm_stderr": 0.009669958978395326 + }, + "arc_challenge": { + "acc": 0.34897610921501704, + "acc_stderr": 0.013928933461382497, + "acc_norm": 0.36860068259385664, + "acc_norm_stderr": 0.014097810678042184 + }, + "sciq": { + "acc": 0.934, + "acc_stderr": 0.007855297938697587, + "acc_norm": 0.925, + "acc_norm_stderr": 0.008333333333333326 + }, + "piqa": { + "acc": 0.7687704026115343, + "acc_stderr": 0.00983706318062533, + "acc_norm": 0.7829162132752993, + "acc_norm_stderr": 0.009618708415756785 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_3.csv b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_3.csv new file mode 100644 index 0000000000000000000000000000000000000000..5a23fbf341c609ef28f4e084a1ea8a9a3331f91c --- /dev/null +++ b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_3.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.317,0.014721675438880227,0 +anli_r2,acc,0.34,0.014987482264363935,0 +anli_r3,acc,0.34,0.01368049572576779,0 +arc_challenge,acc,0.3455631399317406,0.013896938461145683,0 +arc_challenge,acc_norm,0.37372013651877134,0.014137708601759095,0 +arc_easy,acc,0.6864478114478114,0.009519779157242258,0 +arc_easy,acc_norm,0.6696127946127947,0.009651430216428182,0 +boolq,acc,0.6571865443425077,0.008301676410578645,1 +cb,acc,0.39285714285714285,0.0658538889806635,1 +cb,f1,0.3421735552883094,,1 +copa,acc,0.83,0.03775251680686371,0 +hellaswag,acc,0.5375423222465644,0.004975696076240845,0 +hellaswag,acc_norm,0.7132045409281019,0.004513409114983847,0 +piqa,acc,0.7742110990206746,0.00975498067091731,0 +piqa,acc_norm,0.7840043525571273,0.009601236303553543,0 +rte,acc,0.5342960288808665,0.03002557981936643,0 +sciq,acc,0.933,0.007910345983177549,0 +sciq,acc_norm,0.93,0.0080724943583235,0 +storycloze_2016,acc,0.7669695350080171,0.00977630189854803,0 +winogrande,acc,0.6448303078137332,0.013450047479569256,0 diff --git a/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_3.json b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_3.json new file mode 100644 index 0000000000000000000000000000000000000000..4fbf3022197b484e58e53b67aeecb0ca3c19e4c5 --- /dev/null +++ b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_3.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.317, + "acc_stderr": 0.014721675438880227 + }, + "anli_r2": { + "acc": 0.34, + "acc_stderr": 0.014987482264363935 + }, + "anli_r3": { + "acc": 0.34, + "acc_stderr": 0.01368049572576779 + }, + "cb": { + "acc": 0.39285714285714285, + "acc_stderr": 0.0658538889806635, + "f1": 0.3421735552883094 + }, + "copa": { + "acc": 0.83, + "acc_stderr": 0.03775251680686371 + }, + "hellaswag": { + "acc": 0.5375423222465644, + "acc_stderr": 0.004975696076240845, + "acc_norm": 0.7132045409281019, + "acc_norm_stderr": 0.004513409114983847 + }, + "rte": { + "acc": 0.5342960288808665, + "acc_stderr": 0.03002557981936643 + }, + "winogrande": { + "acc": 0.6448303078137332, + "acc_stderr": 0.013450047479569256 + }, + "storycloze_2016": { + "acc": 0.7669695350080171, + "acc_stderr": 0.00977630189854803 + }, + "boolq": { + "acc": 0.6571865443425077, + "acc_stderr": 0.008301676410578645 + }, + "arc_easy": { + "acc": 0.6864478114478114, + "acc_stderr": 0.009519779157242258, + "acc_norm": 0.6696127946127947, + "acc_norm_stderr": 0.009651430216428182 + }, + "arc_challenge": { + "acc": 0.3455631399317406, + "acc_stderr": 0.013896938461145683, + "acc_norm": 0.37372013651877134, + "acc_norm_stderr": 0.014137708601759095 + }, + "sciq": { + "acc": 0.933, + "acc_stderr": 0.007910345983177549, + "acc_norm": 0.93, + "acc_norm_stderr": 0.0080724943583235 + }, + "piqa": { + "acc": 0.7742110990206746, + "acc_stderr": 0.00975498067091731, + "acc_norm": 0.7840043525571273, + "acc_norm_stderr": 0.009601236303553543 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_4.csv b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_4.csv new file mode 100644 index 0000000000000000000000000000000000000000..abe100ea72e2914ec6a24b6b47bdbfa0783a2ff9 --- /dev/null +++ b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_4.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.342,0.015008706182121728,0 +anli_r2,acc,0.355,0.015139491543780536,0 +anli_r3,acc,0.37833333333333335,0.014005754275030409,0 +arc_challenge,acc,0.3583617747440273,0.01401288333485986,0 +arc_challenge,acc_norm,0.3728668941979522,0.014131176760131162,0 +arc_easy,acc,0.6843434343434344,0.00953701924556608,0 +arc_easy,acc_norm,0.6788720538720538,0.009580787536986797,0 +boolq,acc,0.6626911314984709,0.008269171495741622,1 +cb,acc,0.44642857142857145,0.067031892279424,1 +cb,f1,0.3196581196581197,,1 +copa,acc,0.85,0.035887028128263714,0 +hellaswag,acc,0.5375423222465644,0.004975696076240844,0 +hellaswag,acc_norm,0.7148974307906791,0.004505406176606851,0 +piqa,acc,0.7736670293797606,0.009763294246879429,0 +piqa,acc_norm,0.7894450489662677,0.009512378081238747,0 +rte,acc,0.49458483754512633,0.030094698123239966,0 +sciq,acc,0.937,0.0076870078762864245,0 +sciq,acc_norm,0.931,0.008018934050315151,0 +storycloze_2016,acc,0.7739176910742919,0.009672970735149765,0 +winogrande,acc,0.6440410418310971,0.013456740656273955,0 diff --git a/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_4.json b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_4.json new file mode 100644 index 0000000000000000000000000000000000000000..533dd3db193f6694721d7886a313ca94b98124f6 --- /dev/null +++ b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_4.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.342, + "acc_stderr": 0.015008706182121728 + }, + "anli_r2": { + "acc": 0.355, + "acc_stderr": 0.015139491543780536 + }, + "anli_r3": { + "acc": 0.37833333333333335, + "acc_stderr": 0.014005754275030409 + }, + "cb": { + "acc": 0.44642857142857145, + "acc_stderr": 0.067031892279424, + "f1": 0.3196581196581197 + }, + "copa": { + "acc": 0.85, + "acc_stderr": 0.035887028128263714 + }, + "hellaswag": { + "acc": 0.5375423222465644, + "acc_stderr": 0.004975696076240844, + "acc_norm": 0.7148974307906791, + "acc_norm_stderr": 0.004505406176606851 + }, + "rte": { + "acc": 0.49458483754512633, + "acc_stderr": 0.030094698123239966 + }, + "winogrande": { + "acc": 0.6440410418310971, + "acc_stderr": 0.013456740656273955 + }, + "storycloze_2016": { + "acc": 0.7739176910742919, + "acc_stderr": 0.009672970735149765 + }, + "boolq": { + "acc": 0.6626911314984709, + "acc_stderr": 0.008269171495741622 + }, + "arc_easy": { + "acc": 0.6843434343434344, + "acc_stderr": 0.00953701924556608, + "acc_norm": 0.6788720538720538, + "acc_norm_stderr": 0.009580787536986797 + }, + "arc_challenge": { + "acc": 0.3583617747440273, + "acc_stderr": 0.01401288333485986, + "acc_norm": 0.3728668941979522, + "acc_norm_stderr": 0.014131176760131162 + }, + "sciq": { + "acc": 0.937, + "acc_stderr": 0.0076870078762864245, + "acc_norm": 0.931, + "acc_norm_stderr": 0.008018934050315151 + }, + "piqa": { + "acc": 0.7736670293797606, + "acc_stderr": 0.009763294246879429, + "acc_norm": 0.7894450489662677, + "acc_norm_stderr": 0.009512378081238747 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_5.csv b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_5.csv new file mode 100644 index 0000000000000000000000000000000000000000..246db1fd7688210a74ed4e7f1a0bda963591f488 --- /dev/null +++ b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_5.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.338,0.014965960710224489,0 +anli_r2,acc,0.352,0.015110404505648673,0 +anli_r3,acc,0.34,0.013680495725767804,0 +arc_challenge,acc,0.3515358361774744,0.013952413699600942,0 +arc_challenge,acc_norm,0.36006825938566556,0.014027516814585183,0 +arc_easy,acc,0.6898148148148148,0.009491721291998515,0 +arc_easy,acc_norm,0.6851851851851852,0.009530150430975602,0 +boolq,acc,0.6639143730886851,0.008261778456573672,1 +cb,acc,0.39285714285714285,0.0658538889806635,1 +cb,f1,0.28048780487804875,,1 +copa,acc,0.82,0.03861229196653694,0 +hellaswag,acc,0.5388368850826528,0.004974706428434288,0 +hellaswag,acc_norm,0.7166899024098785,0.004496847773250643,0 +piqa,acc,0.7720348204570185,0.009788093832324908,0 +piqa,acc_norm,0.7899891186071817,0.009503353305818581,0 +rte,acc,0.5415162454873647,0.029992535385373314,0 +sciq,acc,0.937,0.007687007876286423,0 +sciq,acc_norm,0.939,0.007572076091557425,0 +storycloze_2016,acc,0.774986638161411,0.009656738215290533,0 +winogrande,acc,0.6416732438831886,0.013476581172567528,0 diff --git a/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_5.json b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_5.json new file mode 100644 index 0000000000000000000000000000000000000000..1fa6f1443f68eb11a6abc157425b90f7d13bab57 --- /dev/null +++ b/8b7178b25bopt/evaluation/rankeval/8b7178b25bopt_5.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.338, + "acc_stderr": 0.014965960710224489 + }, + "anli_r2": { + "acc": 0.352, + "acc_stderr": 0.015110404505648673 + }, + "anli_r3": { + "acc": 0.34, + "acc_stderr": 0.013680495725767804 + }, + "cb": { + "acc": 0.39285714285714285, + "acc_stderr": 0.0658538889806635, + "f1": 0.28048780487804875 + }, + "copa": { + "acc": 0.82, + "acc_stderr": 0.03861229196653694 + }, + "hellaswag": { + "acc": 0.5388368850826528, + "acc_stderr": 0.004974706428434288, + "acc_norm": 0.7166899024098785, + "acc_norm_stderr": 0.004496847773250643 + }, + "rte": { + "acc": 0.5415162454873647, + "acc_stderr": 0.029992535385373314 + }, + "winogrande": { + "acc": 0.6416732438831886, + "acc_stderr": 0.013476581172567528 + }, + "storycloze_2016": { + "acc": 0.774986638161411, + "acc_stderr": 0.009656738215290533 + }, + "boolq": { + "acc": 0.6639143730886851, + "acc_stderr": 0.008261778456573672 + }, + "arc_easy": { + "acc": 0.6898148148148148, + "acc_stderr": 0.009491721291998515, + "acc_norm": 0.6851851851851852, + "acc_norm_stderr": 0.009530150430975602 + }, + "arc_challenge": { + "acc": 0.3515358361774744, + "acc_stderr": 0.013952413699600942, + "acc_norm": 0.36006825938566556, + "acc_norm_stderr": 0.014027516814585183 + }, + "sciq": { + "acc": 0.937, + "acc_stderr": 0.007687007876286423, + "acc_norm": 0.939, + "acc_norm_stderr": 0.007572076091557425 + }, + "piqa": { + "acc": 0.7720348204570185, + "acc_stderr": 0.009788093832324908, + "acc_norm": 0.7899891186071817, + "acc_norm_stderr": 0.009503353305818581 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..50b3122a60030005262937d73f5b46629fd0d4b7 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b61ce88300f29ea2b7dd3cb2373ad4122c53034f4da24242700e532d27f123b4 +size 328794135 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_01_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_01_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..42bf8b42afdccd999e23fba26a72a8e5fb72e4c4 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_01_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0039e9b77a3625e6d3124b56eeaf56b51e3d0a2aebfce378cd36d5ee77f7350f +size 328794135 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_02_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_02_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dcf3618b073f4e1bcdaa9eb7f0bd0a6e6687b6f9 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_02_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d0a1e0a2aabc0b00221ee7c6cabd834d3484563188fdbca57ee7a13de5fc1fb +size 328794135 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_03_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_03_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8eeb0f8551d218207cb8af8a270259ef96f4c2c --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_03_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab6096ad295da70d101387a7f1ee486dde151cf68d3dbc0f248ddb37cd9d4397 +size 328794135 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_04_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_04_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d2f40fb54962eed417aa5409fef13cd2e3f6e061 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_04_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1e8af140e8810726129ca33e52d19ceca34e4b5f22c1e16f21d32b51a64a08e +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_05_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_05_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7e3da8e4a33c457209a03521fff6ae34c24947a --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_05_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0bbfea6778b0c6860c6972d940d0c94b1c76ac9ff9723f86049da68adeef8cb +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_06_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_06_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b229b681c41942beedd44dfa4951ffdac9a7f04 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_06_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:288b66e4717d67be76dfd5b775de703cdfd1dd64123abbdadc8b30bbdb9f2b3c +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_07_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_07_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..76af55f77fe380c5077429c656aeaddc9e124e86 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_07_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a98ca2a4cd4a1b131f71e27c7700e7462a6e69936ec2cb02b05a6b1dcf9e5494 +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_08_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_08_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba75321205afc4c3bfc3539ce3d75a3e02ca5a5d --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_08_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f36e66f62662d27cee62931fc7b0c14b186afa7b6d864eb6bd61a2e414ea56f +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_09_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_09_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8cd81a90523c7ffe67f36794e06d7433fcd6b193 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_09_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:845406990f93fe88798e2070e06efb9125f7101cc816de605c905257d46f163e +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_10_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_10_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..104caf5d76b262f6d54e8fe8ede34b9f7ee6cd60 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_10_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39fb54f25272edf271b1860dffc4cd0515d369a906dad1d60bc01d676bdae468 +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_11_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_11_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..57e27d09ce7d206c4305f3ddcbc8b5b050254bd2 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_11_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b360337a4071efba070ca7121f11838bcc5204d90869658957051e4c0aab2069 +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_12_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_12_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0bac14614981be6ad6467ea4d88586e32b2943c8 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_12_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e022ab447ac3baacf4cf88a045173f96e0b60b27ab3edfa80774f22dcd16030a +size 328799575 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_13_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_13_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0854ec45d3e2ff47736993788307156a6e7bf259 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_13_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adcadaa5306c3603b582e343c86450c1bc3e12c05e08f6cb5d48a9a6413764ac +size 328799575 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_14_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_14_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..83b8903630bea3807995c7f873b3301ea17ca012 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_14_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c44db7d7931da18c70ff4fddf42aa00954a612bb11b76a061e594f96d12f807f +size 328799575 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_15_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_15_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..71e822dc3e82f30e36bb894ac0efe32f73e1e6b7 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_0_mp_rank_15_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75f90badc595512ec7d5f7b84da4313dc6578bd13097dcd253379462822d3068 +size 328799575 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d2772b1a784164bcd794f1063f528af493a4d0ed --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4158ce341f4a18adfee36c2d66a4869b97f0079a492863475c2912f8bccd8bbd +size 328794338 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_01_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_01_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d95b2d732790d382c7a1c5a1a69088ebdd601f9 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_01_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:748051b23d1cad5d3556fad882c01012f7a8254e1034a90dbb5ee6959855f2f7 +size 328794338 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_02_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_02_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4929de83bdd4c994665b3a07129779a5c33a9044 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_02_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:432309da9394187440a0aa412cf6a88b95408d2eedd5b691d71f8a7b4c8f4f7b +size 328794338 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_03_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_03_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f1d7ffabe25f40410c9214989ac361beedfb0e1 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_03_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bea4f8d27b32cc53432aa9e1bc9beb4788c0097584edfba2640c7dba4ef90bf9 +size 328794338 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_04_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_04_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c1409638b31c46afbfdd88982905e02e92b44e0 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_04_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49eea54d365441f9e6bc348588fb7fc80f54b0d7c0e7cf6a2c0b30c7f1869dfc +size 289227106 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_05_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_05_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..36800bb7deaee89736ddffdc643d13e5ab83a1f0 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_05_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f2d3fb481de3b9697dda7928f96bc895eedfa6776bfa847706236512da611d2 +size 289227106 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_06_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_06_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab59125b1d17ede5658be8888c0f227a2d66f7aa --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_06_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74c4a7b55e40ac12ad8d1824ff2490dde26b95ab4d1f727bd647576f62a1dd40 +size 289227106 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_07_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_07_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cec8e78965e0102af07835f447566af11280170a --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_07_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:debdd2081c8265ee8000439ab276c1a509298cc473a42712696fed3574a337c1 +size 289227106 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_08_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_08_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b264c677c37bf079c0e71969f634f20c7788ed63 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_08_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:578aed66dfa1f6770dff7685848b301e39aa2b88c1936b7b070fd62c07826ccd +size 289227106 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_09_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_09_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3f1e9130cf90b61a1c892c6960064b87ecbfb64 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_09_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e7d050ea895823654bb731c86e340f5f0898db7868dac88580935e14e9027f8 +size 289227106 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_10_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_10_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a91fee2c629bae3f8c9902520a4337de9f3c2e7a --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_10_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc4cce9972838be2e0ab5c428222d8146a22ae9e7c91d415716da79913e4513f +size 289227106 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_11_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_11_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c53c3d8086b26006799acb490d7ce453c2542fe --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_11_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b974bcdde23c2c953134618c051b2d95aa4abf7f787c9b337a79508bf4a53f8a +size 289227106 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_12_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_12_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ab7eab52eece4693bcf113843d8261634433f9b --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_12_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2f27850f209d155c9d076a2f858e27ef1f0713c12d123189cb7fd5022587f17 +size 328799650 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_13_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_13_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a026531f218a1ad4ad49315b18f078ea6def9c0 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_13_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70f952f1f8d4b05bcbb6e15d40d1a31454af657058ca6e7e1ed90cc7255d3f63 +size 328799650 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_14_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_14_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..932237a0c819936ce9c4b8fecd46a12138b55c62 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_14_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4854423598209f3992a47abdff164c562ca9e965db52bc5a90cb7739db9f2ba3 +size 328799650 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_15_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_15_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a69f9063e990afc8103afe6796e6a7a14c0fbf7a --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_10_mp_rank_15_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eddbfa5d873134a967e2e54d65bf5e9d0f592fb4cdf742492a62ee6c7d97616c +size 328799650 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..427f73eb34c89819fafb3175bbb0bcd657dc9776 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:979bd8c4775c9a5122f418a0ddf5d878e77ec4613e74171f11e0acb2d22bb532 +size 328794210 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_01_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_01_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1d89f0bd207095d423a4a05266b0ea29e5002e6b --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_01_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30a9edc7dbbda3dea32d8ae7eef9ea8e518f58f1010dcb64ddb000592be936e0 +size 328794210 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_02_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_02_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..018dde13cf0d5ee84e72ee5cc4aa383c7acb229a --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_02_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3526f7f56106b27b654a935d8eb045af57de4abb2e28fba1471470c6c5e9f548 +size 328794210 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_03_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_03_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..880bb8d302cbf5154f7bb0dbf4a978d54ffb7556 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_03_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:def03a6bc0f422240b6b374881662ea171cc151ce9ef272c1b4a0a4754327898 +size 328794210 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_04_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_04_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7fdf554671861b44b82e6e2fd90d70cf743a6a89 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_04_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c31ca0b9dded12f12806eb88996d9ebd47996cb9aaa8b4cf1f6f2879d5c8e82c +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_05_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_05_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..88e33ea9301dd8da06df0dfa9a3e344657958827 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_05_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86a7be47ca9ae6680427d6540c449ecba90a292616d06658f0167575562e1f6e +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_06_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_06_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b24edd9273addb331d2d15a0f743f1e63a40d4c1 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_06_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c8e32e86fd26f6685108476c226435cce2aee3212f069ebe4fa2d4afba54405 +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_07_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_07_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc868c3ce77d330aa0ba826c194307c2a4ea9da8 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_07_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6abc33c3399951aa6cb1f1e681887cbffacddda4b9b5a1e3b77f57e0552e156 +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_08_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_08_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..404b7a8589657b4b2e1eef908dd9dd6754c0a936 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_08_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bbc8d82f541375ab3bc8535765f4315136849230365a2c473ee886a9709dab8 +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_09_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_09_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..13fa27e0e8c3a40c2be088db61319d850e850b04 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_09_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df7f4e12ae96528fd8796e13341a34a85d0ba6497f5c717ffec49586f0af16c4 +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_10_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_10_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..02995d0e0a9e4a6d71de22237cde97c719120071 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_10_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a26680d19a3ac45ab570e7494d4c4c6ffb10b46af96ad6d995d7f7f3ec7ec48f +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_11_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_11_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6185bd23aec3df3a4b17863d05301a670e158a97 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_11_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c2d1e9ea08e4e13f921c53b04377755e04fc72a84c31175dbe69cc2ea728681 +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_12_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_12_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..72507b8a133375e3610ddfaf598d3b640ef161c9 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_12_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d82494197c6f48a3852ca42fb8ee72e6f8a0a8f02f4f5b3b95252fdb931dd1d +size 328799586 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_13_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_13_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c05a64f7497f8473604b03e5782b1784ab1b8957 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_13_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:451a3e94d0ff0f8cf19206a5f2b1cd7d0fe298945b8165dfa93ba0eb430319ba +size 328799586 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_14_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_14_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce2db85346d019f2d8777a469122b27c02d8be68 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_14_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec28db51bdccf9d3868bb5a2a527a8fdb9f31a4e208ab4a2d540f69eed0b39e8 +size 328799586 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_15_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_15_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d5c74f70931770c4a239d80fccdb14d75306afdb --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_11_mp_rank_15_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1182ae67033b031168f58fbac8a6f727aa991f28f36e86d7537bd7ea90a61ffa +size 328799586 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1747c0d7e2fb206a34b919f641bbcf8f1b97f4b7 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5325fd2844b340a3116320ec316caf61a619f437d63ff625b10acadf6053cf9f +size 328794210 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_01_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_01_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a6fcb56d2d715b8acb89416465dd38108e1f77d --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_01_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46a403b791c26ab4097b0959d6d255fe08a2f025d3c7d2d5d81fa1f101fb1f62 +size 328794210 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_02_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_02_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6bdc75352cbd776fb1ee585c8d80ab3e0a3a785b --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_02_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:656bbdbe83f5336d79c0379e0b71f786d74bec32b16a0839c07ec818a8f468ea +size 328794210 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_03_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_03_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..12a828d037e6433616394dfd6957197161033c47 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_03_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8102e44768d06f0204ed484acef6b47a7982ee7297567ae7cd6f2a5eeea1331 +size 328794210 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_04_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_04_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ca3f1f4cead8ad57bbb92ffac994a34db1d3eda6 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_04_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60a6e796b0ab4b3f54218c76c8d91c2921a428915b8b799edb75cc1eab4a3c8e +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_05_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_05_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc14b52f5f686a3f8633a811c844304631ec75ee --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_05_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df3281b0ec3c0d70dbf2d5cfad9dc487e78dfea546184c32f31a9acd0b736e65 +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_06_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_06_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..90aec09ee7a613ce5b0ba4fd55805ad28ebf150f --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_06_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dd743b66a1800f1932b1251f58b860c87ae33c1a8f01d2fad28a80da0c04f5f +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_07_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_07_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..365ff9516fecac581ac3d24ace4fd7858c1d0855 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_07_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff8b92f9a41861b06080e3cc02edbc9fc3bdfebb567f5606afae0616433fec1c +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_08_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_08_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..852c9230ae4cfe9cdf1ed7f31e4133d1bfcacd84 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_08_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aeaf4fb4d51f6d49a76bbfa437254775ee5422deaae90ef92238909be14843f +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_09_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_09_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f949344e6c0506835c344b89f59fe2f700e556c6 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_09_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83b96f508ad652776df8760ee951fc35e1538ba912be6201de1c1f29eca7e295 +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_10_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_10_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..86da07d85ba4ff4dda2bb8da2f22605bf6fb525a --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_10_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bead9cd75a3934953b4fd53dfdf7744562fb61646b6faae27d18ed384d8f4c3 +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_11_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_11_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f4af84d527568252c437afaedac00def96672d1 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_11_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a1a334ca8efb3073b172b3ae3b6b4090189635d92ec2a065b9f7fdc23cb636 +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_12_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_12_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0371717d9a942872e038b82a5a5926d09c2ed16f --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_12_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2bb31927398ef91bfb9451fe5225a9e8e7e077ab24df05a3c81bc22280d599a +size 328799650 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_13_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_13_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..85db1091aaa354fa3a368f0965b78c6240f106dc --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_13_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dcfac92b213d40cdf718b8aa5bdd902c75960c7263e2dee2e1a6c76bd166362 +size 328799650 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_14_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_14_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..22f815d597e5c127ca75a573bd706b8029fa80bd --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_14_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3692e2fa444318d1437d4b9463af3b960e3ba93095139810eea256400d131fa +size 328799650 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_15_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_15_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e2ccd4a579e92df0441f0e2c162ede11a484b55 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_12_mp_rank_15_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c714deddec49507d77191fbc8e9eed857d316ec8cc204c573903f4361eaf9df8 +size 328799650 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..81db05549675dbf8e12a7d87ac8896f3a16d32b0 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89d078b0d10f73fb1c83e1672158b68adf2673d91aa3aef32482858e2bd70c30 +size 328794338 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_01_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_01_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..17a0574f2c25f7d605c8f9abe55896a9e837198d --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_01_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:436cd5c9fe64fee9bd949586342314952a07812187ba8848024a71b4ee6e1054 +size 328794338 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_02_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_02_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc1c34976fe6e9571e36ac7f5cffe1ec5449bb20 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_02_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b7207f755c4303ce9f48a02fc91605be9412bf0c3ec4e2faec591060031e035 +size 328794338 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_03_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_03_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..735a2276abbec4b5c8cf0535922971cdd654cb09 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_03_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4d47f4f72b20435eebcb75fae5b0577700972f4f9ecfb4e75c3938de5f1f3ef +size 328794338 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_04_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_04_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a6b25f4ee083831d1c0baef8efb03f9bbc40c469 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_04_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8deef2ab3aff3edf54612cc40fbc143f5f85e4c82fb521ccfcb9e8cb023dd8e +size 289227106 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_05_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_05_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..04d89f058ebaf55f8f687b498513ed50c647d44b --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_05_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30f96431fbfcc391399a0b972f7307468c77ceb27fa5d9af31f78674fb2a4442 +size 289227106 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_06_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_06_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9415156de7d856a98a5ceca7fbd22a5f46011397 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_06_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d966732415549f4a940a278c1db57ad5d7eae3eda229a4adf23afd28ad30dc14 +size 289227106 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_07_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_07_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..98d3043dba692ace5131b7877a367f15e5e86d75 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_07_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f64c8e4e915f7df1ce5965479fd7a91b867d19c4a413256eaec8ecb0393eaad +size 289227106 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_08_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_08_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5551447c69161b5b695d2a8b7c33fa62a557ee35 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_08_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1638f52ec8426488aac2a6bf5fda9adb2d8eab1f756e806aa9402843fbfaf5f5 +size 289227106 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_09_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_09_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82d667be50fb7ece342df54fb092ab11705aef4 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_09_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d18f0480beb1ad48a5e93cc0ea3222adbcbbbb6494a31f36d2d78dae18f94e17 +size 289227106 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_10_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_10_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1d485a53ff26da0f5298301c118be28ff8e19a17 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_10_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0de1ff364bb222639e6cb0c3bdacf24903b01fbd27742464e7d4a14bcb9b31ed +size 289227106 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_11_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_11_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..32217435cf3953811c74458c8dd400349c8f9957 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_11_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe9cd0c7c2040ae4665a2014e3e6ce6569b427cd835e9a3aef491daca72a66d2 +size 289227106 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_12_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_12_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf9af53e50017ec2dbc00afcc970e43f8c1667d9 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_12_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:625b0326bb3560f79a0984238d5400e758ab6970ec464a95d91a1ca5dcfe2fca +size 328799650 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_13_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_13_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0054d19991c7d73ce1bf4745f09bb242d229b13 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_13_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a15ddbafe49cdde3c8fac778f802d276372454154d4f43564af8840f806bebdf +size 328799650 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_14_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_14_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf99d60cc1b6b0084c693b623878eb63c5417c3e --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_14_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc34140ac75c347fa5f8030e6b6996332097ec0fc039cafda09cc05a85f7de03 +size 328799650 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_15_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_15_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..573c1b993e3f4f68fbccd913ac6d44d733186ef1 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_13_mp_rank_15_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94bc38b88cdf13b2870ad56fb4910389421584d1c9893eb548354c62c8a44e9d +size 328799650 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6444570fbe964190c22acd7a8e8ca1b4caa91c40 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9cae22ebf64a9021ee87c2da51b8aa04976129061f80f545450a85074ad08f7 +size 328794274 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_01_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_01_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3741c3da102dc6d400c134a6504810ade1072440 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_01_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c1868239bb884fd075b18a9eebdad0397c2dae4a1e25c1556fc9634599f6b09 +size 328794274 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_02_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_02_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1193dffefd7f0459c739626115f6e9b521aec504 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_02_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ddea3a05d446eecd88c724806739f1f7bd5372011c6c84c9209e4ecbd7de596 +size 328794274 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_03_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_03_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0368da7a966fa0f74780dd8bda5103a3e97f94d5 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_03_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f14382631a85b9e3b0804fde5332837b869038c46f56bed7926253143dfb326 +size 328794274 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_04_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_04_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..28d6499fbea5d95ffaaf1d89c426a24a4c4e3369 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_04_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81d2a08d003b9d34e3b25d8577ca93b3403c32e026ff80abdf6e96af625248a5 +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_05_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_05_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f8bdaa3faa7c515182be26b86947c0201b68d8c --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_05_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5b0494083d0a2661c3c51edacb979efc8e3058d996106d1b232f5f6439cd0a7 +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_06_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_06_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..92e69d37ffc74bc72134f1fe35258bd92e732e7d --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_06_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:300e099895d3274d2676c790db6dd74dbad175e2653e856eba541f8b104dc96c +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_07_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_07_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d3ddb79bef24b9d593e3a14286e58bc8944739c --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_07_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eface9cf8e958f7c91233a3d5b62eef67c8595ed0e5c4adfeb3f55c9daf3535 +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_08_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_08_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7b70d4178fdaf4c4c37ca71add33d17823aaf8d --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_08_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3fdfb97166ba87565b620af8ba333f274efde4c2b5b4776467de4fce667948d +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_09_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_09_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..db69762c7eac0f5a127efc5646203005caa288c2 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_09_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:905aa7a5158c6e21060455e8444785c30720b89b35f15b48d4a389102fa2cb4a +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_10_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_10_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd405880c6cded61ea0c6845fd348ed78a8906de --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_10_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e778774ce5d0146dd059be291b83f62382e7ccc8e917e2ea7cb0ba8e6c7b46d4 +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_11_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_11_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9467f317e8b86a01336428d737b5619dd3607416 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_11_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98d7dd15d1d77e3076665a8bbc3d90b84041bdd0c5e4e01ca409bcd67c1544f6 +size 289226914 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_12_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_12_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0183c9e60439939ab6c05e3a17a8fdb67e93549a --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_12_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9427254b29e80cfb99981d62f51e3314b8ba14460b38ce91cc4d9a3903a9879f +size 328799714 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_13_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_13_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..09255b5236d2922b2e97e459c5dcddbd7dfd6d6e --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_13_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27ab23077e0cb34d1366276edf8f98828ba89c6ce8a068fdd5b23feee70015f3 +size 328799714 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_14_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_14_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..510d22acdc4dfe0427f92475676508c27dbbdf12 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_14_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08741f3e9fd0f67f077ad828b912f7b3a73d568b0795423a2c89527791c9a9e0 +size 328799714 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_15_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_15_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e077b39a375d0ced6834d6fc19ce3865bf6331d --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_14_mp_rank_15_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b5d3d18f4e65b4651f49101a891d8c4682f88596439774c41b994a24261a1c9 +size 328799714 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..18b16207085f7d74b46cd1869b26f591eaaaf9ad --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7adf8b886cdbc4e20ab8fcd7cf249eae7737524ad0d90a12942f80567a57aa43 +size 328794146 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_01_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_01_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dce11d2369a5250ad9a322f21b37679c0f2a58a6 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_01_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:560d88a6f534e41ab60d3488168b45e3ce1192929a9d850a94dd780b2e81d185 +size 328794146 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_02_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_02_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a30328d579d6851fcede419e623e6f0ab6fb19b --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_02_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8e2a962ce2aa68532f2e617c29f66397e10fc9264792caf9ebaf49a750131ab +size 328794146 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_03_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_03_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..66860352264a749e3a227f1d08eb60dc36800b16 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_03_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e73c28591c17272c05068a1e809c592ec1427e76c20b1331dec4409d063b654f +size 328794146 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_04_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_04_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0eb9955a3f2674d3b13e937344eeb477b847d523 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_04_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:256d7ebb25022bfde2d99a30b50f8869c2b251931e6ba3732eb8260e23f74c01 +size 289226722 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_05_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_05_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..49ceecf9dcacbcbc33c672b419839e6047e24844 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_05_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27aebf9f5b3f67cb544fcfd53002a84783bd707b6a47c582406ba841419f9f9c +size 289226722 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_06_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_06_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1aa4a2008c2c114ce62d92b1efb70912bc55aebd --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_06_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9660a04848223a13003f898e1ab9d49d0381f8557d8f3eaa13d8fe49a26bcee9 +size 289226722 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_07_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_07_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..842dc8f7dde2ef54cea1339c13a41ea38232a5c5 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_07_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa8027b909273f043fc1cb954bf4446833287d89ef62833f6e13f5c3d98aa49 +size 289226722 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_08_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_08_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..33fd138d7c2f4f40c12423b1b4caa7f1fbc44278 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_08_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a154a489807f479f1cb26ccdefa3df798580b0be8669003a43ac0a28ba11f075 +size 289226722 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_09_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_09_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..35d493a155e74df7176de1dbe79d9eeb634e62d5 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_09_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3471c2a80c49c7e1b6301204099d258469f24c421029a21f6c499abdf7f4f27b +size 289226722 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_10_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_10_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..25e49ca4bd286b33e8400e0b26c933dd3b1bf62a --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_10_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa9eaaf5f776542146abb40c246b8a232cde9d21e39d6f25b6ae86bb50538782 +size 289226722 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_11_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_11_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..01656ef1d56236feb0eebd5e4f4c060f270df512 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_11_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23fd0322c1b82a3738d1214c0056a6fb768e5d2fdbbb2fd4ef552293d597aeff +size 289226722 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_12_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_12_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0759dd3ca670f2adbc2fa515811932d64aa88bc6 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_12_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35e28292ff90e1f3531e5cbaf81e18b0470ee1445c0e875da6d933cf50f4070b +size 328799522 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_13_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_13_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..db3b868341d259dbed8284cadd2717e1858d839d --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_13_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebb776b4c56ec19c1ef16ac1c60223631f092812bae7cbb269823dddcf2c08f6 +size 328799522 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_14_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_14_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad2f675eb7fbf084159940eaaff7322e3531195a --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_14_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebc284b8e7447d747407198ae7482d66d03c31117faaacd999fa45731b7e2d8f +size 328799522 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_15_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_15_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8dae8e05f0b2d079d6865631de4cb13bacb97ea --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_15_mp_rank_15_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19ef7864993ad5a90abea6c8873edb21c47c655e418a3ae6532cf4225806d94a +size 328799522 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f92d27c7a9c709fec3c92c481f4e138a82f2bebb --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88b163b6ba364457e4ef4bb5bd71548e5561c94a955c0978d3cf5d84362712cb +size 328794135 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_01_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_01_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a09c6c90f36e91f0ac0b0dbb70e84dfe1abd97a --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_01_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48e58b2369e6318d4d8fa52280770b4cd4375570a734b915f383be6d7e59ae3f +size 328794135 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_02_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_02_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..178d4c81db5cb3740012d0b2474ab1bfaa204a7c --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_02_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bca530a70ba43a1e13eb51d9206d7c57bdc50a39ffa732142a9216a6dccc565 +size 328794135 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_03_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_03_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7ca39194d1a87d0d81b86a372c421ae3b62e0e61 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_03_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55a98e21054c8da326e48c72ae2523385c2e013fbfa81ab25f193a5385c74656 +size 328794135 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_04_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_04_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f900f0405c833d7f5d41d893373ee8cfd27892a --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_04_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eee203f04d830cfaa31c4385c17be0e76c0b693bf141c84f4aad744eb07a08fc +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_05_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_05_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3262e6b55ba4e0ad6f0774cfc854df3d37751d41 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_05_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d82eb5b6a1573038e627dcb7d58d1c449cf063c81455e268e23561f812d27fea +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_06_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_06_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb2411abea42bab92088aaefbadfc0161735189e --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_06_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c9a12a438e4f7d8c3ef2b0a9864c36cd42a6a9d1c66bdb0a7a537877c6d6c6d +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_07_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_07_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e1dab614eac487c203f646a4318cca37910c4f4 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_07_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c82955769a9795102a1289a25bfff9287f6df6d01079918f6843b64f62295bd4 +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_08_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_08_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4440c2b3e2392622389634ccad490f1a2e999f7c --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_08_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:756cd7ff56d8615679bbef97366ded4c7b8002bc03d3bb2445dc7757225a7533 +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_09_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_09_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..12b8127ee797eaa0572ad665eb9a7cc0b6aeb3ae --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_09_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54cd4a131c2510a6c46a4e06811de15c956fd56929f471be81e72ef5e27ffa62 +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_10_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_10_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8612a40495902629bd183e59ddbb3cd7dcf8700e --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_10_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c5b5dbb403cf9b281337cd3e4426b18a193dafc848e6f28f5b2a2462accab6 +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_11_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_11_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7922a55de52f78fbef38798f83f2f3b7cc4aee4c --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_11_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa970bc29d61286dcfe463a4d789c6b108b65dc3933d85964d705b82c8b40776 +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_12_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_12_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d54f9fe9939ddc409cd87850ebc89f84b1f58249 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_12_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:501e6aa215fd78154c87822790ee4fe87d0debb19e01be10bfa27c4179efebe3 +size 328799639 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_13_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_13_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..33c233347f813d164ad5056153ce0d7f490f0dc9 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_13_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f42a3b3f3ada50f6f6410f864c5d8e6ce5be39e806fc711220dd1a94a085aaa0 +size 328799639 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_14_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_14_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb96900e54e8e1db7998247c9cd635cb81ac031e --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_14_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:726e78db7f3d93f48628f380292603f1f4f1f2b5675de982446d99cdf3fe1e0a +size 328799639 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_15_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_15_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6cb9cd44e40f740dd35abcb533c44499b1a9bbff --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_1_mp_rank_15_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bca85dac77edd0e3149869274027c6834e7e128542df3a062fa5976e8e14b342 +size 328799639 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4c6a56e5e494d86bbfdbbac7d8892675216c0d8 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b68adf5bff3559ad04f93e865682748f9f001ebfbebc95910397342a225688a6 +size 328794199 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_01_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_01_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..90abaeeebe3aada6fd44e61795e5d0909a1f2ad0 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_01_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64ab17f1350a1fb449f953c7bc902757129f8ccb9a275b2ed76df0cb3bf67fd3 +size 328794199 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_02_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_02_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd7fc1ae1515c20863e115e7be2c1be800ae262d --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_02_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64cdc1179a5de628f76868aba327cd40d671423d7a186ddd9a77b833e40c49b7 +size 328794199 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_03_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_03_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dfa9f7eb058a189a545f3ca080c3b1babdcce98f --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_03_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c29bd4f5ec017f12e396539e7e27cf0e1c069dad7eeaab09396a7e7b57efb72 +size 328794199 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_04_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_04_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..743d052e6706f3fd30d405a942cf51cfaaf01f0e --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_04_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7952ff7a5a0a23f79ec9b7c20636e11e783e9b9b0141e7acc61f5e2ac854a179 +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_05_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_05_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4fbe861503640f107c6a1265a1bc9fa7af5cc88 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_05_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:688d230a48372a9df87480521fe7f7cd694cb43f77020402658943d21fe6fa90 +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_06_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_06_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2cc5d50a78bc22e666c40e19f4a1e7b90253ef4b --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_06_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:884af955b36e8cc75021f53db91c8e1f935619c556389d2491f69e63e37ae31e +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_07_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_07_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f7b53c44ebfac7b514cd505b884e0bbae51598b --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_07_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb24ad38669bc4fb47798003682211e1d9a57013901b1b8ed0fa7a6084b8eb5c +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_08_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_08_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ef87d22df82cdd95dafcd5a5a51703afd9010050 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_08_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5d81a28bd83da2c1971e2f7292c8684bfa8f4a37b2a8174bc31609a91704d2f +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_09_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_09_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed0dbdcd212cea25d3e7700995bf9e7a437cda76 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_09_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d352b8864021203291062ab0381634bcd415583fe64a8a32f1b18abbe0005385 +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_10_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_10_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4cd13396bcbe5aa142280233d6e1371147b3e2f --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_10_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9faf2302a93a1b1f431a7bcd88773f2a28a8a9f13c31fa4d6568777e57930d0 +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_11_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_11_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0cbc74bd28ee4729482b55fd17cf0f093a87498f --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_11_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:549543797e2a157cc661e1bd56b7d0e89a9c6a655ca7d6bf59e719719ebb16a4 +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_12_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_12_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..02a92327212e5a3e448ee74f4971a784cc58f341 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_12_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c4af5f389f210df6f9a9205a5b6ea816acc1ea0039d4043ef7cb8f0dab4aa25 +size 328799575 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_13_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_13_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..73b9eeba7569f7ca4c68c7180a4ded463608bf70 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_13_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34c4a718b0bb532a00d8be766a6cedd55807eb450e5b0e86f1fd9f929fe8e665 +size 328799575 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_14_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_14_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4de814b669bafa75c453ea683c55c6e24780bfd --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_14_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d51e0ce767b1f89344a74b5b8efba07e89c6ddb4b12f24a2f0cc53fe69b28a34 +size 328799575 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_15_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_15_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a58b1b6b1cef85722316c597443d9e626bd3eb90 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_2_mp_rank_15_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2234ed6d0f6f9af316c94e6bf3efb4a0c57a9f2393d0fb487fc28ca7c0ded19a +size 328799575 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0ca60e27c66394741fc841e5b580683ebd3b96b --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4beb876aefc9d983765aeaf6dd0497c6a23c8a9b6440da1aea2dda04d017ff1 +size 328794327 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_01_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_01_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ea1c2421141ad3e9178e3fce595b67265bc2c35 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_01_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e36844da87464a10c720013f98a51dc2f1c79058326538d13d91493160a758a9 +size 328794327 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_02_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_02_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..15626ed002eaef38964daada925d4f7993e153ee --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_02_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ab88e7f3c57857963b332eea17a9252d251ce58119d18c81c4bfbbf5409f31 +size 328794327 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_03_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_03_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..33fb2d29f7d0422a4955bd940861508e56ff388e --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_03_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76603745b16ad7a57d0a5f8e6d32df665880723604b29741f94b2d6d11b75eea +size 328794327 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_04_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_04_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c202fd380911640b66f8bdf02f9a4c54cfed5fbb --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_04_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25f2c72658aca669a86a2ace82c99114ed34010fce59fd46e44fa7bcf7912649 +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_05_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_05_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5e0ce8f83fa23124c53faeab82a7490c9b59f10 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_05_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afed776b614800446f0b2427adbb3e794ad64e75bf091b1a8bc09865f92f4795 +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_06_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_06_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9441a723902c74de41190a40913dd0d036059346 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_06_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7e9116a03a5c03bde6a085d2a32e8d71b4802a50c32b1db04010d569730c592 +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_07_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_07_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..455be8d4eba9e2217b27d8b57ee78a669b737446 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_07_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b3cd8a953146c00abbb79a7fc9d1ad0c31e24f868d86571eb40c219b3bcb558 +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_08_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_08_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e28196db91a3f6ff1bd88d4f418eccf200acbc21 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_08_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cfe031dfa959c4aabc07e7d4ff443c3cd496813761ad64581d5172619cca7b8 +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_09_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_09_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3145b5eb0c4c11123b64bbfdbc21caa5a6fbae6d --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_09_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18713e53e63aaa4c02da36d47409d81c474f1264223f2d328efcafac071ce485 +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_10_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_10_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d633250c220336e2267e6a4e9b8341e61c62f48a --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_10_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbc0c98967e9b93f43d51d82dbac42a5df6be66a450603284b1231118d32d2b5 +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_11_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_11_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..823ecbf9577bbbb9c6c038979bd5f3b7838cbaf0 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_11_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6942b931d91899fe767debc8021233caac673f41f95f868750c5ec7eccfa4c3 +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_12_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_12_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1817bef8403512a03e93b93ae059e1d4adfff4b9 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_12_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd94438b30c0ce96c6a654d6a5defb78786e908f99bf18994aa2442065219f6a +size 328799703 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_13_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_13_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6fbe5d4509c13de03e359b203d375ed25970ace1 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_13_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d33da48a8a28629289e07765e0fa88e7fc224b3365697fd94f6448465eaba52 +size 328799703 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_14_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_14_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d087b49398d34be05e6715ce166324da4075c879 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_14_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1690ed01047574c5c1994908e4b903d8545946ca46e6b96549a150dc288094f +size 328799703 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_15_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_15_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0fba7d9fbd1e1ed44cfe0bc7e4d0bea25e9f8e02 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_3_mp_rank_15_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0767676ee00bceab9d66c56621b182ff1cfe0518c9e2cadf9a27b9ce7a921de +size 328799703 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..70735e22131dd37498578b9ddedbf0f9bab185da --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b1fc3aa2dc64a0e06dadf5ebbc6bc22384cbb13f52f79914f18fb03cfa32e66 +size 328794263 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_01_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_01_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc0ce49638ed4eb85f4e9fb1039319ecc226b2ae --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_01_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7df46fea75925f8eaf430ed06015c8f63a00ef28e6ab0ddac8013387549bdd61 +size 328794263 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_02_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_02_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ca7f056c639113d8c1b8eb0119f20846bb412a83 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_02_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bce7775e1d04df70c34b08d09e02648ca9ef0d40c4aa7749791da3110eb837b +size 328794263 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_03_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_03_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1412375cf78df514cfc738227cb3e6a96f897729 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_03_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38fc9c642106ebb18ef296086a97ac5b442260d2ff07c1be33143d30b06bb299 +size 328794263 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_04_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_04_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f7ed83261dc52f22a5800c9984aa9b2b498aa98 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_04_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9105afaf6c84c6ba587a146707d0d26f3844f622a9affae66b7c0a51a91b7c19 +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_05_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_05_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d43f88c69ae8942357e5a8f91de272f44306868 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_05_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2164f5e0a54fbfa93433be11185c30865bd1a24e56a98f9595401600f2319b09 +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_06_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_06_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6220f084556028be97600f96d204d7ba9f6df711 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_06_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d212566a87a3e3e07629a521c8cb6c0c882f1211dc15b5a9633ab0c13c85903c +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_07_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_07_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c2ad8b61bc402555bfdd8e20e0458032cd8c35c --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_07_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:722214a1bf7a19ff0a19fbf60e5b60ca8515503ffbae9afc7dfabe001b0defc9 +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_08_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_08_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e772113d5449192013d9182b34bfadac3ecac89 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_08_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cce5a9539ea22ef5c0ff1e65a4ee5fcf08a611519218a07a64c9c1eb7eb424ff +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_09_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_09_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2d607a986bb93a6aeee6f015e528ef0fde756d0 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_09_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d96e2ed21fbd8330d4bca5918f131723a06ca351d215d93871e1d0482a732645 +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_10_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_10_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f702eb9870302839da660ca8285e40179b73d2b --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_10_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8e47e9199f9525390f24f53ce1228bc446466da19929432d2dfb28927e0c36d +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_11_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_11_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..00879808d1b4b3cae7148be0233ed2f3958ebc5e --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_11_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:735dfba9b5fcf5bfa2412d8c809ce8617815ec9a18b1bce0e5c439178b05cb94 +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_12_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_12_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..609574a7765ece3710b78b8121ceb80c8a454ed6 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_12_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b82ba256d68159376ae8271772556cddf937cb09437f93daca0ecbe6973f5697 +size 328799703 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_13_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_13_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..54ac7b186a6b7e372446f84df8aee0139a03270a --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_13_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f50187db57501cf57c1fab00f0e4db21c57fb6a7bf838435158cbade8638262 +size 328799703 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_14_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_14_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3fafc1ad60ccb0834db66ddeffd8cdfbe0720e8e --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_14_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e1c116ed09a6021ed82eff3cc6880ef0d729c6f69f2a1af29887012880a08d +size 328799703 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_15_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_15_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1643cd38189f2cb114719335ca48cd1f5c371287 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_4_mp_rank_15_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c460d95d783ece903ffa26dbe438d4cd2a1706ebf355f4da6ec93ddb12a6228 +size 328799703 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec599237137e0b331b10405b40b4145b140fc65d --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a0bc1fb02949a2b6e03f4f43aa2db5c70a13fe3cee85a7665def58f2dd38bf +size 328794263 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_01_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_01_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..511cdca3bfcfbd18d38d7b2d015fa15b2f021841 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_01_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01b307945f0989a44800b9fdd9f0e3814a67648250d89b5a0e42191d03a6945a +size 328794263 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_02_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_02_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a2fac2b2dd483d46a20416ead1d5a4af19d5dc66 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_02_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c917d4e8179fa027c984112a4c71eaaad1197bb36f3bc277f6727e91fefa427 +size 328794263 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_03_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_03_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c409a9e78bc7f8bd61b9db51a97c15d48fe7094 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_03_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:885fe78b2bf4849153a85004303e19d2a189da279008ec998bdcea1879615785 +size 328794263 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_04_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_04_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e1432752a5faea5fcb48fca9634b824958dddb29 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_04_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe47695ab92c0f37e8c4750f8e4284d7658d5d687ec176953b0625dad25556d6 +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_05_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_05_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f278c78dfe3cf2b41635c61577a5fb9e0e5677e2 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_05_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c14a58c7426434f42558927c46c789a81813cce879efe23379883817d6f17ce1 +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_06_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_06_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d0599ae78e343d17e4c64bb6cd9a96205d58824 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_06_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4c44428260c87c378dcdba77fb6f93dfab6215c943ac290d59252a3c2da5809 +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_07_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_07_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0a5c4568657cf3c5f9ae73f287cc1954966c13c --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_07_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47d7c70384c6aac92f1b0a3e17ebdc0593efa727736c8956ec8b2169d2366798 +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_08_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_08_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b672524b3029de57ac30feba6a0074be712fabb --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_08_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb68118fd8fbce27d9005bebde15520f6acbd8824b48107aa8922e5ba31eb937 +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_09_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_09_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f84a5f9f2276ad56196cc34058b91dd5be87523 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_09_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c95340c76b7382bef1634fe0c65911fb45f90226b545517231f94e941e757ae1 +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_10_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_10_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a33b182d96ab099fcf5c5756e341aa02446693ed --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_10_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dffa3a8855e43604fa04bd9f17721b9e7be95454f87025dc6d53bc88afa29821 +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_11_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_11_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc547811daeb45adf8e8f5834c01d562217113df --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_11_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62384d908f209f87eaa8585ad7a1900353e8cfde1af7b82d689a4fcb28d40958 +size 289226967 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_12_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_12_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..65835b7937d9e72ba0fdf9923df65ac64389f0df --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_12_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10eb19fc06303889a170dc09a53add47e7ba18452c7b766e59c8ad066205dbc7 +size 328799575 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_13_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_13_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7eb2104cc63b127867f582e1a9d1ab2a15ccc2ad --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_13_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f63b2342c664d0ffb1a3758de80968c724b67955a898bd63ef6a6b72328713a +size 328799575 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_14_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_14_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2daca35a33f474a9cf599e14b467c1291e2ae6a4 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_14_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70506aa23dc1d00a7cb00d1dc42bb8132f06c348a554fc343dee5650dd8d44a7 +size 328799575 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_15_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_15_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..449cf8558d9f3ff26052c2aef8618c057e54dea8 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_5_mp_rank_15_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be60c26d940a76cba33b4d09f338633605146c90f09e0622f248d1238712ad68 +size 328799575 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..17d120eb6dc102087b18ea2f72fd2dc26854f912 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a14e9acdbbf6b0001acc86a35890e23657207d180fbe0f2197a8ebc7c1c8fb3 +size 328794263 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_01_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_01_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e49fdb183a5c0ecd2546cd7016919843bd99d27 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_01_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d870c11e3137b0589c6a3080f6f21a7fe477aee11b2f66a310cefc20a88971a5 +size 328794263 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_02_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_02_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..edc860ce37f4f1da7ea055db753e436cfb4d4f6f --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_02_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c16e2f2ce67ea8c2683a2b1890fda7b9ee8ae17096bda3904a192f93feb02458 +size 328794263 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_03_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_03_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f45e94438bdfef6db2e40bb28173c2d718c2be6c --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_03_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:557391022498449bfbdeee2a07476cd32a345d4e4917ae0cf1069ceee3683d12 +size 328794263 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_04_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_04_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0cd7f00aa988cb24faad5fe58b1d54854fc86251 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_04_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b021871ac127ceb6ed9dca68af8557d1bd1529d996aebd3529d616a18eb11047 +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_05_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_05_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d5c2b8063b9fa8e9a46ddcc51ca00f1f7dfbe0ff --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_05_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3f8fdbed3051e101772abbaca245827c8e6e457d371a5ea27bf952245c2d8de +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_06_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_06_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1728a6d842a5c5311deb8304681e2fc76c6e3777 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_06_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a00696e19d14d7f77867fa6f1aaf4db755461583cd62347a03247621e513324c +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_07_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_07_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0eb58de100736fb37fecc0f6decf835ac9629b22 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_07_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1638cfc94ce3346e6634ec22ea21bb0e41736cde4b766803a9f8d3938201368 +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_08_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_08_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..50ac67a603066e875a0e1e61e705c86118069155 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_08_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4988dfe7e61beca35a9aca2b967d31a35609fb30e541b8d69759a5d7843f192 +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_09_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_09_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..202e580c6fd432efc80f866a1ad24f85a28b8220 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_09_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7da8dee6a4ee47c1a31808fbbfbf9fb7a4e87dfe1e5a4b8c35f2f33e8e1a99a +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_10_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_10_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..91671f83408651f0dcb2e08ea69f5b2738da4645 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_10_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:459f3fc49899d50d845b851f81f4f3abecb44739a356240e124f1fd3f812d2a8 +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_11_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_11_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..31a539c6c1315e84b37812f2f65a11d0548d05bf --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_11_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:486714ae65c4d1c9d3cbace79a1f672e12d5ad5e9f4484eb6b457a50a2490a89 +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_12_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_12_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8536c34885b56b10877f214f224e965f85c658e --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_12_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd7b05d151cab6c48239963a505c065456d22f3b2c4e49ec38e6cb1d955ac48e +size 328799703 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_13_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_13_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..63a64d18e106bdee02b2fbf528c56030deebfb64 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_13_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6697dfb64935853f1144e84878360a816b8c46e6a85d953da74ef10611690b +size 328799703 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_14_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_14_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..67db3a9f13ce45252d41cb0f2bb6536bac496e86 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_14_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30df3cc066ef31296741d16aebcf9796914c3e5e095162d3f0732fce02f20491 +size 328799703 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_15_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_15_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..59877ec0b1f570123e6d32d84f866816a3599310 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_6_mp_rank_15_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9206958b675d9e778a343e38c579b671aec76f1a00254d77f962bcf88b0b4a95 +size 328799703 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e932b61b35f49cdfe0558dd0be91d6710133d07 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:477f94d23f552a67a95f5f4b8e748f188cceec0b20876768609aa260d8af1026 +size 328794199 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_01_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_01_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f99adf9efc282bf42b2208bec4d945e3f6568c93 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_01_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2b8d95a79d4c0b42c1f7ed295bb0a7d50ca80908766ad79c6af4773e8a996fa +size 328794199 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_02_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_02_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..114b77539472a98581dc1cabc0dffd75cbcf6007 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_02_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:255e68b995410f5b85f3d8b2cc18da60b508e41040a51bed2ddc321d2ab8ed70 +size 328794199 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_03_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_03_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d3cf5054fd38ed7251fb5f361386d1137265eb5 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_03_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c085e9f173c485f5566eecc5ee412879321699d4d4891b6efd2488f1e572cce8 +size 328794199 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_04_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_04_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f32636a639c51e5035ec12c1c03b99dfd90a9b13 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_04_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d1daf6603e64a82aca038875015750bca425b0ef90cdc3ab65902d180160dda +size 289227031 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_05_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_05_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fccfde95e4c6928184804565c33ccf3760d62819 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_05_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dd071b6d3821671f8e4297403ac15afd9e169ae9722ed258dbd7a88a1084efe +size 289227031 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_06_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_06_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ef38e15c76c74e5d1f67dd86de0fc3a16ef39f4 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_06_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:177419e0053fd19b635430f7a8b9d586481e883b2c7a65531a909b57534d9c20 +size 289227031 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_07_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_07_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8be15868a1c3a5f4765cb80dad7d3262901d85c0 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_07_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b19a316fb227961068e2de8caa8e93ae381f289caae96885684f9c78c998d862 +size 289227031 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_08_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_08_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0fcce05f950bc75125f45234d9efe3e203afe73f --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_08_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0358abfc5042f2c2ca449564560be6b8b66a6bf40caa4bb91ab03b055d63025e +size 289227031 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_09_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_09_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f70b1e9da05e6fe79e3eac064cbecc4995e440f --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_09_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aede848ccb6c329403263b77236e340532787867296cde9e772a0b586bb3710c +size 289227031 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_10_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_10_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..92cc8bfbadacdaffdd501eca0daf299aec5e71bf --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_10_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b44150ce6ac70ed3a6fea81dd7d435757c45dc4d03bc200bd059ab97299c8605 +size 289227031 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_11_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_11_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a21a25d4d57cda996e9b404ef7371075756e23df --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_11_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e212715109d38d60e060f4fa76c8755d07efdeef8791bacaefb39462e12d3b4 +size 289227031 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_12_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_12_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..70fa869da7180e70ae53e0f462e3625d440248e2 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_12_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b58cd3e422b70acf942e4f27a7d77a5c43d8cdb8da4e08d760db623a731e9505 +size 328799575 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_13_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_13_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4987021a2cd16e392e0b09aad7b695c07ae3fc2 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_13_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68440c0a0a9ca2a47ae390c202a19848acf8ae44e7c62c29123da47137e4ba0f +size 328799575 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_14_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_14_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e942a27b3fd35510c991afce7f75e10db347c8ba --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_14_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f8179a0a22fc0de8e64d51e0cb4df5557879c6a671050c3e69c9a4b3da38df7 +size 328799575 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_15_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_15_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..72e507675fbc02e9b86fdaa5f9031fe353cbac4e --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_7_mp_rank_15_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29155872734c3fdf0471e37a59a2e34338db83fc3bde38af55ae628432742094 +size 328799575 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..94a01439b1bb3171ef78e17f3240179ea046bf01 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b788e2e269955cbb9c51048ed852de8d7c98d311be3d0c6fd4a50a2108b15cb +size 328794263 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_01_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_01_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a2d386ddd554885209f3607aea37b807ca26749 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_01_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bd59d460bd6b1330a331827352d0e6ae326e8787e6654f891ebad471ac8cabb +size 328794263 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_02_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_02_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1daa5acccfd0b2a8d7d3bd9a8e0d8aa9ab582c8c --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_02_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c466380af8b335013da0f17f5f5f966c8ef3d4231b036b0d2786bfe9f455ef8 +size 328794263 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_03_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_03_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..015cef0eac4c3ef0441c100a586be8b62f53d8a3 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_03_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:359824454c939b679d78b3df00abe174b52a4dcba4f407b72e04970a3d4be02a +size 328794263 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_04_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_04_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..98d40bc8dfa0f6dc304dc02716397c3329aec8bd --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_04_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:583ca29c8b31d56a35716fdc654b7dcd2d92cd034353485d3450962e133ce4dd +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_05_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_05_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d72eba80a5a77f24fc4472f920a16e3dac4414e --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_05_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6f2380a22956051569354a2dd5db97d248e176c4f8f47977d9fbe9617e93ae5 +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_06_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_06_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2356b4ccd19fd63ac8940e73c7d40912eaad224f --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_06_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dc1a3d40c5e70c9c97fc8e3a56e27fb14444f4de272f6e2f5e852e7fb7bb609 +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_07_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_07_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e04bec664fd2db8262ea7239f6028e2304daf638 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_07_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5affb0f23d28e5884b7e90ba43a2ba99c5bb842a4a95c6622d6f3728d5ac28a +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_08_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_08_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..706420fb203ebb861376fd7351774a3e2a6eec8e --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_08_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:986ec6218391a3c28b3b3d0a453e50cf550ef8c9916f64247f7e3bbac0b6e1ef +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_09_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_09_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0107d1203564218d07697a632c50c20cf099c8ea --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_09_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7af56df8530f99690c8e822c32e67602f554001a3eec3402818740841fd13f4a +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_10_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_10_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..49ca151f6d561b8b7fb59e920d3f7c47cc4b8523 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_10_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01945f9747c39a8ce1cfdf8fda2fc8fee356da27be38ab44eee59e53a9e1ebda +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_11_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_11_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..60d3cf2dadba3796bc795c6eac307d345eb7add0 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_11_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baf1ba3d248ccd735a2e4c27ff713975d8453f5a572d5b22266af3dab6ab9822 +size 289226903 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_12_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_12_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..03ef3919ce042d356c746f7764b1a2709109bfa3 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_12_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6198ae276487160bcd0acfbf546f26b302cb31d5f954964626fe4e74aa208861 +size 328799639 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_13_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_13_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ce150de17fb4819b5c6ce6be9a8a9b8920d972e --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_13_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cbb992e0606026ac75ba8059e039f6f346fa8a7854c05fd54876bfe08c40218 +size 328799639 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_14_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_14_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a60e8f5be2e07f60439ba7eefb90b19dcc30f6d3 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_14_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7567c381320ed8cd9113b5064dd97ceccb9ddc7baafb053ee38f06cb0c91e2c +size 328799639 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_15_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_15_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8bea7d472663305273abf93b7ad1dbd8cbde9e45 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_8_mp_rank_15_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ed6718513d98fe42a1999e4035e6fb51ea280e1f2d61b40c4763582697e8cc3 +size 328799639 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..145222d5c9b5c7b4e0e689041cefb3c50f14fe7e --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:335126c1097fc7d40c96b935c892add1091c9aad8fb2ec94f1d28a4be3b98fc0 +size 328794199 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_01_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_01_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a28250249e89ae609cbd58881b87c1f78b7759df --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_01_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a76a546eac5cb0915b32f15cf28b0596cb6881a7fabc1b45aa5565137988dc1 +size 328794199 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_02_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_02_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3860a76fcdb5f84a4978c19a9c299ded462eae07 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_02_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7187b28b393bd5a04c84e7565c2daa372920a4b3dd136d9ea7d6053c59bf93d +size 328794199 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_03_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_03_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..373c6c62a51e5e8b546190540bd555114914aa69 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_03_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e45000a9ed12cca233df439ba21d2255bd13f1fdec38aa8023f16b6921ac5c1 +size 328794199 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_04_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_04_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a590539bc68d329ecd0c3db9a61a359d237f137d --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_04_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff28eb6a916550f6445098e20a6fc760294278aeecbfee566a47a0dc64c77970 +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_05_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_05_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c12ce9b95ddfefb4553240b0da80f58f9d68c64c --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_05_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6657995a9c3801a9b49f999e97f44df96c2f1042c12a7d1769d805bee9bd3783 +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_06_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_06_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6445abcb72790380ac46adf27cfd040460fa78f1 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_06_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6815de3609f7437b6f3eeef2cd83982991c1482f67317c80d1e8c176448c6c3 +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_07_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_07_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f8f654ce0730e587cadd895222ba3f774a0a6e1 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_07_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7663dc7cf61b09ce2864d7f398c64b5ab287e2e9c60b90d982b2990945ab9514 +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_08_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_08_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0436ab1c4402331054359888ac952f2868e88ae --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_08_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7419c6906a7aaecd29ab72f0122d51358208c6341eefaad03d815401d7ffd72 +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_09_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_09_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a2772ca45b011ba8a3c637bba8e0730423b44e9 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_09_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1154af2007c13664d154fd4c113cc1403eca36a19e22cad4bbaeb3e530c84158 +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_10_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_10_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b268ccb5c8e75684df709ebcde01f907ae896761 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_10_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddfb8d6de8e5c156583b7330ce84e6f048e7e975807103f9814ae39ed90bd82b +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_11_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_11_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ece379a65063983fb127acc03e65d29f3ee9b2e --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_11_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b00a182993a231696cc7410a95605621ad3ddb151114bfe2120ff79b1e54cd7 +size 289226839 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_12_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_12_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac3d2d5809f395d45896c69196f0634a29874db8 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_12_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb39fde7d545d297d8831b1b446d471fadbe7857846953702e7e6112072dc39b +size 328799639 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_13_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_13_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c96146d687aca3dc0a0cd9fc98fb9c40091a1e4 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_13_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e0f345ccf4c79f129ce378bceb5c15186e8ceed305e1b8bf0ec684732409dc7 +size 328799639 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_14_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_14_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..59496d6c00d7c5000795818d88a4ccf125fdb042 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_14_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02a9404fd6857d4701cef57b233962525a49fe85e95e117ddfde11b394083ac5 +size 328799639 diff --git a/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_15_optim_states.pt b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_15_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..05884d82b8ac613a5d75f3c32679d0d15d8083a4 --- /dev/null +++ b/8b7178b25bopt/global_step115146/bf16_zero_pp_rank_9_mp_rank_15_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb7e8a2cf5f94f6e046859767366c4c574994b7de2b338387a48ed0e669464c2 +size 328799639 diff --git a/8b7178b25bopt/global_step115146/layer_01-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_01-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..34322d4412ef4b171e152d44dfd6cc5c445387a9 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_01-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58cc354f0094d7167a4cb453c735c05cbda9202f126b21363d555085a09009dd +size 105514243 diff --git a/8b7178b25bopt/global_step115146/layer_01-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_01-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ca4314bd4440a0d2f74ae5bafd6a09a652b4fcbe --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_01-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:389f4c80e53e86029d6a7522749e35b7912de74c190be0db79fe27dcf180a9ed +size 105514243 diff --git a/8b7178b25bopt/global_step115146/layer_01-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_01-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a6805cb6bd070c61d65b7e9e9046b7ed1a6647d --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_01-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9c4ef41bf7e745295923d77b7cfed4a465a91f2cff6b532a35f53e3de6128e5 +size 105514243 diff --git a/8b7178b25bopt/global_step115146/layer_01-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_01-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ca2be0bf3df45a7777050e8b44bb2cbcba4fb13 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_01-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22849c4d357245379551fce291bea2621ec4925b5656c1ba662012d484502e36 +size 105514243 diff --git a/8b7178b25bopt/global_step115146/layer_03-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_03-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9abd97aadb9bd4ca06d90726ab1ea2ff610dadfc --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_03-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6119a52b87a4a9281a3e72c5a96ce6b4a86d9fc086517c9d4740f5b5e31329dc +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_03-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_03-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..14f3a61d23cd2d17edfb3de8c2d0c8e75563b6c7 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_03-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9907adea4c32d09832eb95f9b576123159c69bff9b466df784db24c021a5b0a6 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_03-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_03-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8149941b702719878cb6adbe0d4dea2860331a93 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_03-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf9a348bc1c350b03400bc15c420b5054a65eb766e0599f38f3416f7f13acacb +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_03-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_03-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd2812c7e9a6caa37d0169b5ba92e2e67a1aef37 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_03-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b89d472a888f6671d5f221a83cc82bd733ff9ca6bc1d9e62e48aec26a6487e6 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_04-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_04-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..25a0cef3c5ec08d0adcb030367cf3a5f1b40bc2e --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_04-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbf33ed17350af317da2766308998417d17574e3160e31ab7c99e09b8bf2c67e +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_04-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_04-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..06e0aa52ae6591cff3b42a60e1d7ac1b1f3fc8ee --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_04-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93c937fc0b038ec4f31bad5d696fb31807cf01a92c326033acc4073ce1b08595 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_04-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_04-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..991f2dbc1a11fede110cd97ade55ab29cb0a9734 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_04-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30b640d239c8444d7f9aee672e0f819de15406897a3158ccfb28240d5bc7ccc2 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_04-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_04-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..036f8e9a4c397b3f22756f32133d18cbb9494270 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_04-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99bc3efa407907a40a3362b97311d9b4afb39ec84e0c57a8ef7486ca93642218 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_05-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_05-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd8aa9ba75e50b7de95befd56d12e858c9ff8a82 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_05-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc386b1d33c61469643399e8e2e0b6c1e7a0730ceca35939fc3fff576cef4ace +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_05-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_05-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..31ba107cd55e12900f834555916c4ad67cd8ed81 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_05-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f63d61d8379f01f7cbd3a8903a961c4c9bdb59cd51c90a04b25b30c809f6a07 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_05-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_05-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..995634855d6a147f2f8c4aa56a8041cd4527ffdd --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_05-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c7bf5ade1d21131846fa85cdb31c8e28cbf9db484307c27df3d7c0c5d1398ee +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_05-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_05-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c245e9b06dec6ebebd998a632e8e984fb323bd5 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_05-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f56a405e927cbcfa08f6fd65c11b7e307e324d56ca3ba9b3f5c079730953cbed +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_06-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_06-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7381c3cc66e44e62f66f372a325e3ec0a6492c6d --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_06-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92f32b4e63787765d65212a92eee0a971d3bcb03a72000fa224e2e0c2334cd16 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_06-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_06-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..efe9cc1f79ce2a4a39860a92522a46741a211f1f --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_06-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bb4629f47c5e1ebcc42eb67ee1b760bf002b2123e6886022e16949bf7c05957 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_06-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_06-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ada5982f93d65cb3d188c4df92248fee9e929ac --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_06-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:383a6c035d0982d3ebd7bd2db5e8ec7b34e2b94f4ec83f5cc156461b37a3b03a +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_06-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_06-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..09b8f04fd0b148e05beffe8cdac78e685ad9905c --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_06-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4738ea82cda6866f36b067c7dc2b04bd02b225df5382d8701263c4cc3fc38f0e +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_07-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_07-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..029ee894d082bb142b1f6340301844d1fb313c2d --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_07-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb08aaca81c7851765ad8d5647d8d55b3a6e60ec2e696abf1a2939706e2d7ecc +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_07-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_07-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ab429f812eccd5f69c288c44df6e0c3a3a57aed --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_07-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f854b2293fe93152c9cd1aebf82efe6e8b9d72ebf58b482be0689d859696c9b6 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_07-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_07-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..98d882baa4162ae55ace2c0ea07250face3e1044 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_07-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9002393e30be22e5d9ba18c1d19654fdf5e6a0a19a4053db51b866f39a397c79 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_07-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_07-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c27a13162a0efd2ece515d1d810ce66e666a326d --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_07-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e7ef59a1fb0a5d3b92b3fd2ce600b3cc1c929d2c40f9f437de6f3cdbc61048 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_08-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_08-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d497c14ff313b53c5a3b8681a929c79bb11d6199 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_08-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f2904551eab5e8b8fc6a1c5bad518a14f1561d10ab52db5dfa77bbea8ff2c8b +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_08-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_08-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..12f8e677ac2a39a32fc183bb03ead68e6dfff619 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_08-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f68afbb058e7f6c1ea62b1ec1730dbe2cb4dd4cf06c3245ff1c42c49cedaf61 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_08-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_08-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..07fd83d3224b459f4d37ebcd70a8534d51c26e21 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_08-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:654c6d1efe9c8a45ccb8069c7b91c6b0f76ea44949b0a0fa4801c9dc3ef4d04f +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_08-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_08-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f58dde3ace390a0a04ccbab05fd23f840f2e850 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_08-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59c1451e1ce3192508a97bb5e7ddb81e9ac616aa144c75e44fd4990a3d85149f +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_09-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_09-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..15cf80a424b4de3ec14d640e79b1ed126e15235b --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_09-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9c51c0f0ce5194ae73ef732d7174846da907877ffd18a3fe8ce425450de3451 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_09-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_09-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..99c63b51ebb792a65dc9110f7ece68efea06e25b --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_09-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a01051fa7ae3ef811da22643ce017aecbec4b2ed3c5c915c72db2986eaeac719 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_09-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_09-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a5338d1fb66a71e3ab214dc38734e504524b612 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_09-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e4e6f1757caa96840f99b758b35a65f7edda9c81b89ac4a5629d5735593e85f +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_09-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_09-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b230669c1caef2a8962c5af739a13356bed5b9be --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_09-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0879636405c2eb089c3fc3e5c71c904cb32912b25f5b2a56e03f1d7613721aaf +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_10-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_10-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..16a034f023a03ca4a1d697ed10cd84fd9fe2f296 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_10-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cff4b2e8191b54705e40fe2ce54675bb0b4d0d74cb3023de7867adfd7095f06 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_10-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_10-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d0142414c33117ed504164aa2ab67d50961c59c --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_10-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e08429b480c32f12dec33c26e075b0d37df87c0160cc00a18154e5b5ec8ee639 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_10-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_10-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9349df9bef25d12bf18b8f9a75c3d55f42cb76a0 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_10-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4282af0bd0646a9ff40f4aee706e764ad1f01026cb92362578907ebf210f9af7 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_10-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_10-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d2de6f298cea8646b3d78b172599be14f97c7759 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_10-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65be6953230435e565f2b29853df832374e1815af7a3d96f4019646583d57339 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_11-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_11-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd7b386352e0cd31a149a6ac310abfceaedb9087 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_11-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbf9bd8555b4fce3ad58b905f804b1e6d979de2807933757eab239f67df97d2b +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_11-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_11-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f084f63070dd7a7763b9e1ed337ed5610a453ee2 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_11-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fff5d7d81160e42d9b25f0711054a4591f3254bbeb0a62c15fded1d454de4e0d +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_11-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_11-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..93e51a1b8c70f79d1918c6e7a8bd8635177003dc --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_11-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec8271a844fb043dae12e0f9ef2ade7a1403fd7bdaf7fa60bb9c5e72ad6e37b8 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_11-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_11-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..064df4ea3820bf6e3acc2ed21624ab881b5ae022 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_11-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:981c91fb269be45b4ebf0ff87cfaa8f2537109b3b3f7a6a9463e4751c7601798 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_12-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_12-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7419c72d20f769550688e2a9dcaed3b1bb2c3cf3 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_12-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc8bbddad1f93cc0bd6681347e470de1141a93fb61fc94459c3122bb33cf2d20 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_12-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_12-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0adcdbda7535166633e4390c0c54525d27350ecc --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_12-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4fe3451d8750dd6853cb2c01b79478e378c40e791a53aa040e0ee14ab2c9fb5 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_12-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_12-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d226d6e9b409748d105435f8ad6b94faf1b0d63a --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_12-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29caf9d6289a81b2b9f71dc4560f4b585e6ee5281e6f82e711c84226caa3c3e4 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_12-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_12-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b7bb1fabcf7f6e1a1c984957a25720b141f7d7f --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_12-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4d8c5567d215c2f90264988739dbe28ebc42e9e62bf47e83c7bbb4c4f5baecb +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_13-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_13-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..586c12848f8173d67f2dc7ccf2038f3fb478862b --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_13-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44ac04fb4ad9cb30420746f2cb355fe08cfaab6d89bb2c68c966ec55613caa8d +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_13-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_13-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2abbd00c1a388c5b42b821a47a66accfc04fd579 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_13-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca7be1bce743de1dbe754195987d5705eb8d6c9a60bb7cbe364a93a4fe87aff0 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_13-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_13-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec864e0a295dd40056de2677a9ede7722e9a8e89 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_13-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0af456a3b56174d43ca0fc1b7bc9693e0e08232c9e1d0b4fbbb26e956a8bb7d0 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_13-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_13-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3fed1c08d01bbc6ad54ec6d895b9f917aba4fe5 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_13-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69a11f4846d2c52275420df2ed1ba3c1211068f236328406aa877da9284983ab +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_14-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_14-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee2013ac2239de60eb6c8e4ba786e1a7cf766654 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_14-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3918063a0f1b085a024738d1fb1861849af58324f96c1ebb8c318de664867e9d +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_14-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_14-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0440df234f6a4a937b43ed8df786d001edbe7677 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_14-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8534ca485ddde9a87cffc38481628ac908ae84bfc938a2835ef5dd94586b5004 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_14-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_14-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..48cef0e720e0d6fe090f64303e834a5b2d61e121 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_14-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e243ffae23aa5c41e6457fe0880c37709c7a83637f69f48d120bade4e10fec3b +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_14-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_14-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a8448ae4164a7a401b68c75adc81526d2d14f14 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_14-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f46c63515101eb6daed31896782836d1ac35372cb327cfa9ce75d2fccdc4b89f +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_15-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_15-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..350badaa455e31ad9d18fbe323619685385a9dca --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_15-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99db1fd5c46cabeb493f0b50f957c098fc8a57ec906415c37579a409b108157e +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_15-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_15-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b832319ff5b6694819bda72c29f28a2893dec6b --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_15-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:126dec04db0c0f65d2b5518e1d5cba6dbda8ffef9bcd1523001379d0b940fa12 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_15-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_15-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..056d3151e0421e665d5df3f64ac15e103f0afd3e --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_15-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27130a94b3984feb19b7de0dddfb5769fa6144d592bab54133de451cb890bacd +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_15-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_15-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..01e408f6edc39de53eee07de0f8ceeff866e4323 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_15-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f71172b42b44c1fdd5335159e22915014e26948a3c733dcfc9873a439f4d053d +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_16-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_16-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5b0d9f92c37886870cdf2183517f8b02c06b7c8 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_16-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf2c4c9a09af4947cc8d1c5d2ad36eb42654a05e3d1bb5b2544239f16c9b88dc +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_16-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_16-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2eb27282d8f2df94ae0192aa96a98474ce92a120 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_16-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d81528f1477a8b7d7408fc35b90b85c64194492579e6e78e1ee418f322694ed +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_16-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_16-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e159dca830f2514569919d7cb56d01dceba48934 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_16-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f08548a875e6b1f325f24a8a37e8039454f61359d5ad5233b9d07e3f742255f1 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_16-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_16-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..55028c4a0d8df7422497cdd51832e3fdf03bd946 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_16-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb82d3509aa48524205a60df73b43dd1fa2f9c3c62268af4ca89a9778fe34057 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_17-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_17-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2a05b4bbe201109e4e3a79e33f462f0298d5be5 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_17-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58ecb6cc996262bf6905e83b0feaf9cbb1ed86875513b79272529d4d3da86ab4 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_17-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_17-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..54c0f01e3abbc20dbfbca2349f17c18ffb99087a --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_17-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73e94eada5498765014b2d4dce5c23a769080e68f26afa7e691fe311b4ec4e5f +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_17-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_17-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..827464a8f6fb0c9089a9082cf837dc217088d3d0 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_17-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6361939ecd7be2a82c6a3db9b2b2910cfe160d153aed6c0faa87dfc1f9d94be +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_17-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_17-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..91b26dc82cf0708de4d13e9ca350fef1a708b34d --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_17-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e6e773e3fc16d15fe8141f1f29903f3b742507033d8ccd4986b61e35af0ab52 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_18-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_18-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f57f93adb6599f2edf4eb5637a69230021c388e --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_18-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:796016f6bd22b5ff3397f4f0af3e745428821dedb6680bb036f7dabd10da867d +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_18-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_18-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c269992237364066eb28196c8abd3a97b2065855 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_18-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc59212bebc378545463b7957f2570f46bd597a2c679d48ff341a292fe99460d +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_18-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_18-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..484613f709379028f02680e36bfe8dcc94ec1862 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_18-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f03a657726f6b89f3904940199345602a7c41c62489fa4623a46eedbda58118c +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_18-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_18-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e26293f497ef264ed976ab4e86c4477261be386 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_18-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f182b2a5a18f1e7ba21b7ba29fa7135b7929b5ccd48b5d8deb4d418581dcd23f +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_19-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_19-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e30f4ee62fc3d75c4965da0240de2bcd8f39dd2 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_19-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c284e93132c491801598395c93c6fd3ccbdc79a09c4a99dc858b53cc1f8347f0 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_19-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_19-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a35852ed4746dc9c4f0a24bbfde8855be84911e9 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_19-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeed9686f7c2f50258239e61820756eaa35b010089523452c2bd391041fdeb03 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_19-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_19-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..140b2ceb5bb1f2efe9e524804f2fe2bfd73e97af --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_19-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d324493a260533d318d70bfa39973b8184bfc8d09d8e8d4da28de9575c858c4a +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_19-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_19-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..05308e20aa0927ae454b0a8d069196f737bdfa26 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_19-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2480e61dd165cf9b145f4557276599c8ffcccd5001565d80563a3368182aaf0 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_20-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_20-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..75332e4e5d3932340ba370618787bbda746da6ff --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_20-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca16d6709ea2972125e5382996aff28e1b78a006cb684ff67c9c1fcb2a72161f +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_20-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_20-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f27bbb7ccca6d16e6aa4728b35c05523b2bb3a33 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_20-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:813e7a230e36c99c44e54398b0b23dfbc95b551594d21b4c95ec484ad765d70a +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_20-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_20-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c37830f22793f3bae3bbf91c245425d364c23b7f --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_20-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4f248639d54e94c00454278739f9ff847e6e9b2f3999d0820f10e2a33ddb376 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_20-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_20-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8b7dcff89ceb5f34bb5ae8ff01757630ad7cd85 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_20-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62e15a913e59bd4d8396014895983a9badb374e666b5315a4d741c9a41b7f32d +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_21-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_21-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d531b71b2f0a2886f8c08a61ef492ba87d74f55 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_21-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57eb22acee91990d0683e5ecdef522681a8a8b7cb16bdd2bfea06448fa261501 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_21-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_21-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..09139344a068799a2f17c4a27d9ae86c32a61d0f --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_21-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fe8af4f6f370848841f7a93d0caba463f739feaa8c2fa1043dc93e3741a8197 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_21-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_21-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b76c45bd57ee92c79e1f609f20285de0caf2afe --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_21-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b6548bf5d0e21584a8455c017f66c39190c5a2ddd94a9a1177fa6900016a373 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_21-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_21-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5cc700ede82e456dc4ced34e304e143299744068 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_21-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efa7fe835c37878f7b113d794d4209d901619d7e562b64ac4381a04e9ffb2c66 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_22-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_22-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..45ba81804f22c5cfd14e487c0e89a849d39e6508 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_22-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24ad929140caf315ca7a4e257d56ebbecaa2aaa93def5be8c3a87a7c2ac7ef7c +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_22-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_22-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..676fe615cc64e0356d4c98a5c3fc768f6b1353e6 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_22-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae2357fa0973e7a7f8e13f2115ed731bb00bd049e6cf10b4ba68308bf7adec8b +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_22-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_22-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc4d008405e5109fc25b3f18c51e5120426c9cce --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_22-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f983a529e458d84af04412614c3230dcd29d88d1a480dc2d6a6fc68e97e2ef6 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_22-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_22-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..35021a0be7dc39cfed5cb4159539ec8708c69dd5 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_22-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:013ddd74d96a716bc428c744b1b85a58cd140866ed14b43de505f7edb51bfc09 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_23-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_23-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba7c1c4e88b0a9ec18b87ec9a5f8d947ba1f8e9c --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_23-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5934858f2fa7723765a79bc208c9cd9e7276a7fa50d8065d5c39edb735ff2148 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_23-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_23-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ff1173421985b34d917ff11b886653e6359ab1a --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_23-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17b4de5b193ac29d92b1cebcc534ee3bafc4345ad7fbee9f1b6f4a42f5fd8a9d +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_23-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_23-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e06181a540e798acb87d6cdf958e1afbc379cf9 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_23-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cbf6aa94f5db95efdf907c42e448165102fb381a735913ad497506ffed63486 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_23-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_23-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..682b186e2e6c41dbc94cae528d4f1f77f72a0402 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_23-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e77c547a670688c70e459f6acf843f1c94fbe8e0ce0e9bf5a18e33b2bd5b3a +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_24-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_24-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..88c844419657982c66a2e6cfceeb65f7873b4f90 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_24-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b425d014dc658da7953edd889edb4984f447f969e61d9b53076936139b49dfff +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_24-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_24-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8aa5295a324a19bb5a07749c00e7e2e629ef7bd --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_24-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:712f79c3df7b69b166dbbde32ca4d817a17e45b71d88ff66ed9653e190c243d5 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_24-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_24-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4ca4bd555e67fb68bab74f50dc4cd52e40dc91b --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_24-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22fc23ac260484dee349560c0aacceaba01014c7708d4c658a140f06a769a2d5 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_24-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_24-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..890f01ca342f7a4b5506d797a9eb658ee5ef8af1 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_24-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ee7137a313543fda2fad48b6716efc0fa51dd1381893274b26ba5b3c935161e +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_25-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_25-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4cd19dc6ddd2e194462d4a200b12f14992802e14 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_25-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0cc5320687be41c9b7339dd96d33a4a22a8cd8388b19d219d39b90670c90ea9 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_25-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_25-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb41e62033658f27b580f3e11acfafb85cbe4f33 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_25-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8882ce003bda38355beee52d930a1ca2213c790699a95e14d7ceb9226e46b9c1 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_25-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_25-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7695b4cb200c9fa147092e37a30b5de2f095715d --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_25-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e33154ad66873572c5beee0120bb387096c0ef08bfd67bbb6a0b5dd21ab34289 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_25-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_25-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b225f9d1544777e6fe1a1f8975d2bf9719cb915 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_25-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30c3f88acf7dbd0afc16daf43c4a2eeeb35aa9aae808acad6ed9b80df1f7c63a +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_26-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_26-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fef7755438a9544679a532d2a3c75f29d2c36c7d --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_26-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d56a640c6a05878011c641f612b1251d265d901cac2b8ea437e6f0c8be85333 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_26-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_26-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a26d027e1fd035d73e85dc0602fbe2b5ffb3e590 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_26-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25ad246abf0dba87ba73abe37c967292b39737bdb4eddc87f7dcef7b302a2578 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_26-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_26-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..18db8ed90b10b418b18fb530fc237322e8c14f93 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_26-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e31ea9a444f5a75bf733b8481f5ae91cc33fdc54dc554a8408c873f448ce85d +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_26-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_26-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..37ed6834d86fcfd6fef1fb66b668abe1a9e69a6d --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_26-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dfe908887a59c21d0470a5da862e317e6c48a3a8c063601970e6c9a358138f9 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_27-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_27-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ec5d416c22e038607190b3caa121ec70398e983 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_27-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fce84f0a9c3c38a4737c04a00a97b4f895a1eb398ee07a9c1ffc36be4a8bce34 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_27-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_27-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1aecf2a60144bf8af1bf980cbfe6a62d1052d745 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_27-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6567ea305a47382026f20b00a4ff8270eba35abb163a158b828bfe3dd7fcbcc +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_27-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_27-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f917710e4a5ac77d60d0dbe47b3f22a2dbce1535 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_27-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f91b774fc4a2a6ce49c018d806031e192a2887fc2a58f24fd5366ba4ba1e9a5 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_27-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_27-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d21bcc5cf1274f13e03376e00244f24a3daf639 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_27-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3072da4ae9281690f0551bfd40f4f11855d2d539a05e70876afa125aec6327c9 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_28-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_28-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f7ea9a786db1fb12b00ea8bb41aa7e4d5cba5e2 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_28-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fdfc18de156fd3c8ef812c65e572631f5ecec491b8674ed563aed772b133ed0 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_28-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_28-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..829814b4eaaede1249218d46d08be603ca9f016c --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_28-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e172db2ed5a89c81c853521bec8c762690bd0fd25ecc19b81b91dec1740cbd1 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_28-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_28-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5dbe8736d8d24a7f500639eea0760b528e0053e --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_28-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e24d88b2a48cfc2721699349db581f367e328c0da42733cf567e76348283e3d9 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_28-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_28-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5aab44e6365e51e1cdab9565c7bfb98fe60cf417 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_28-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5732e0670ba094a48278a5efd283c4fb8ea91dc260dd1c60701d530e21a05802 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_29-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_29-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2a05578d0192f1b39b6d280f0f4115cb7cdcc59 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_29-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2ed278a5541a8f32008e394b4095f650b82e4883ecd2739be875636bc31ada4 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_29-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_29-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e69837e8815a21d5c2226f7879e8aa09fa9e5148 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_29-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8f821cc4d468f462eb21843d9db3e59e230b531b7d58cac5e1e2d73bec0b191 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_29-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_29-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ec58542bf85c258af2985ced64c6ac6eb8d7927 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_29-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fa2fbe4bc381b5617ac1ac5f8a56c3de6ab0fbb353a15189e5afb0909816dbd +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_29-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_29-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4bac5d28bdf687c4e8027f33d262996babcac51b --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_29-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e2c43337d461ebf4930ac4a6e90918430b0f7df8a391896cdcf1275a6507dda +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_30-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_30-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f7cdebe9e811d0e240e1327abe099c973904cf38 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_30-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faa68439645a86bf507647f75a0029af7be778c515b32589a686e6c56fb10b90 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_30-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_30-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..20bb13919a2cf4dceb3a2fb508913692f2b45204 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_30-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f802df4a28501d88cfe720d11b6259e951086758e7f73355c1dc695428dcaf53 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_30-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_30-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..90a558d88eaeede07dddc0c6b3c98c53464f7dd0 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_30-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e8a76437fdc383d8c7ce9a191d800415b2d33d4d53a2002dad700ec719ecff2 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_30-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_30-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9db0e7f4b3417f33bfc811b3d5257ece20922d9e --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_30-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f253503849f0bad48955fa988cce7700ad7801f843f0ae1ab21899f1a1fa820 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_31-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_31-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a980c32d336a5e01f334caf93c741f48ff706949 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_31-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c01cb515e7ea2a4c20ec7e04321840929d13ef3ce69ce3559da98505ae8e0fd4 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_31-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_31-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8dcc33e706644bc836c7662c259dd2bf99b1c4f7 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_31-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b0e13b356486b16a62b4fc3934c00ea306ae7257d619a991676add96f7c2be9 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_31-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_31-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..75654f15428d63d60518ac72f3df35d3c5dbb127 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_31-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ede5141f1ab39af4cdab7e285dbebfac511416e9a24dc727e1b38088d0ec300d +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_31-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_31-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..36ebce3df33aaae969dbb455433e07861f040d09 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_31-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23f490fb316a8d1f17070aed7e49c86fa648cb17b51f913f5be4a75a6e640392 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_32-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_32-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..89ae979818463cc5a9628d9809f14216a7fbeb66 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_32-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b05de08e8ea895e1a3be2a2d967cfcdf6c2274f0c589ebb37a73b317202b39b +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_32-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_32-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c13e369b21764a6e184d9bc525c4dceec9e2e21a --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_32-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8439e54e8a5e306ec730e9405ce93850d1a1244c8541e543a3200e74b6d1bf7 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_32-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_32-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4eeb84f80f881997ffc6dfe794dc91597191159 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_32-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3012fccd5aeb6b5956e82f3d252a8d50c45925c03beb16df4f637519b099afb +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_32-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_32-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8307548b8cd8ba78113db0e9a42e8f7432421067 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_32-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b94225a2fa59c22021397d382438bdb5d3d07fc5d9efcf14d7b24c04a8755876 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_33-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_33-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0237d2eabffd89d4db1b6b3a236bb15119ecc931 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_33-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4083c622d157b870bfe92bdf34974fe63ca9acda2aacb92f255f85e0ed439c6 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_33-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_33-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9b2de7e8272157cdc0ea6356254d54159b79791 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_33-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6df53ca32d1a0697a7eba2b5f233c622e0b23ffff17f71c718078d697877df57 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_33-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_33-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..51212263d09a845f2e3da70949d5b1c62a946647 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_33-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60b4054580ed384224f73228156560f66d0e4b5eccb2dfa8825530ff597a2316 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_33-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_33-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ecd4bfc482654e86e3f64c81d22545d14ae6767a --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_33-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c5a3fe03fc69ba2d4e7cafd905848c8ff0ada691eb0d755ba87a15bbbeb70b2 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_34-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_34-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a22f8d18c46d33c8575bc54678c4a120d028af53 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_34-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdc3c784c783c6baf70e7e7a48293443d389d7893a1063ab4ddab5bcf00cbeb3 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_34-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_34-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2a13da9c24f2cee88508148154781a8207dfc05 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_34-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e085d11840afbe99900f228439fba621f59968ff2aa3b83603076f96ba82df6 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_34-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_34-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a82077dbb9426f90367bac574afb4d8fc6fa850 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_34-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09e79e1b77d20be79a9bbb9e6c701c7cbb91c74dbb5f5a56f4d8154999004912 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_34-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_34-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2410c12b03aed4d6de628b57785bc0449a5611a1 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_34-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14737b03105569b1863c7170ee3d51a77f10566047307e975291dfcb24b91613 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_35-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_35-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e83cc6f0221f05283758f2cad171dc19939e78a9 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_35-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:296488ebec08839fbd9fe1c0c9e1b92e6ad06c8591160f6a619e77c4230c30c3 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_35-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_35-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf07ad626eaa30225c1455ce2b5260d98182b4d4 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_35-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b227108ab0333aa79ca3ea2d949938654c537382dbaf65ec5131b9b71bee312d +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_35-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_35-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..789c1cdde48ad49ff33859d43bbdc324812399c9 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_35-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fde7009c54cdb564554be403ab5d847eca2433041167e91d901a2a9660b4e28 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_35-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_35-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d1f6d5eb5bab87c1d9dba4e3eed06f1827de791d --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_35-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb589c14d055a5a0594a709343f257d2c20cd92d2a05e899a33fca8d522c7aa2 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_36-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_36-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e3058454049f8036231a90008c63176621e0910 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_36-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a18f693b42574d8b5f9d27fcf79241f30720bec6d25c0c57502979faaa3f1a4d +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_36-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_36-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f42a9f98be64372d102f75ffbd5870aae8942f8a --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_36-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5195755abaa233471a7eda52b7c69dff776a7df5e64c430be9b32ec71a648e41 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_36-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_36-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2f0442a76795f7e4151b8a0878f7e75e188485c --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_36-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14c0c0f137502fc7fceec9320008f32b1d17f6ec406c4aba2d743983ce5ad7c0 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_36-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_36-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..eea1a5108bf650274cdf390c512fd5c2fb293b66 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_36-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02839214eae670009e405f81b66224e193597646b4717f6a2c8cd50832d8ad44 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_37-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_37-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4cdae29b1faadc04f041311286350a915fb3cbfb --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_37-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f98fdab6d68ea1dfe1b3b1280e750f9b5309a34cfbad66c80b560ff072b47e4 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_37-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_37-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ebee79a71429a21348c327b81c864a0836c2967 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_37-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff4804221fdeb50872630ce903908370a435990859f18769dab6f98d33e3add2 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_37-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_37-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..388e9bca58d6dda0700afba3364c861f9d1b24ba --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_37-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11e94867f0580382942b5bf06d40f7a7571d0c8202bcf2a45dac20f0809983f0 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_37-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_37-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..73f1841c51eb0d5c45b14020219c6c3960380cde --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_37-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ac354a0f3f8daf96aef5144eb3e78d2852c5ea33d26316f7c877be73c5aec7e +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_38-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_38-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd8dcd459d0733378cb7dbdbfe681a299201eb57 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_38-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c97cac618084b93cd3f2906b51252f4aab9fcebad42cbd61f50501e450fd5a95 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_38-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_38-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e6ae84b011c9cb5efcfd4afb7a9fef2c90f42c45 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_38-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4224ea8d4b4bc10b52719653b318c7e3543157281d115868c69602484fb972e +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_38-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_38-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..77014f5013ca1e1bf72d808fa1dd58d34bf33fce --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_38-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f008877dea9edf9e3ea800a3c1c652c948c6629d5b6c30e201b69d0dc414ab84 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_38-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_38-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d87aa4a5dd176dd68ac30009bc222010cba1e5f5 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_38-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:901f84a281c54e0d6c9c44567b819c7d59a754221582d7f0cd9928b1927e545e +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_39-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_39-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..45685459f6e81b65c53acfca282e432cf2ec5176 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_39-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5f1f9e5e0d747ccf5397342cbf5bac7ca5d8fd2dad933caf439dd06c4fd5648 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_39-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_39-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f96d7fafd81ab562d380025c276a00d52138f5b8 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_39-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d119098831c20726898fd8b2a31203747443a949d0cf4ec074dbcb04a5d07d6 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_39-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_39-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b6cbc4786ec000020447ea926cdd8dfe02273fef --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_39-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fee7164b02d0d0ba512d9e82eb994a8e468b6556f182e0e504e6591117a1a11e +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_39-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_39-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f6c0f4799b4d8eda101ac9cc5f864d7dac8f305 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_39-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62b0da6c26c2eebc06d8e4d0e2aba707e5ac6bb1a79eb14de8d099e8ab1bd05f +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_40-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_40-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b87ee6198032a85b7e66d0a7913af70ff1a1a34d --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_40-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3507729593e417873de93cae6b740604fccf80eae1c8f85691243f2258809d64 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_40-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_40-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d56d49874efe96adc3e44359280962e8fa866eda --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_40-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82f2f1dc14a7fd99f6c657ac0f9341025fbbfd1eda489035bba9c3573229c902 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_40-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_40-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e33eee337e806ace956bc75c7289b1f0b50358a5 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_40-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d9bb007143b4fff00a47778a5d7dcd809b1c49b4a7de72f2e8728cb24d6c8cc +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_40-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_40-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f22233efa5c1a6506edc4cc390bae9b1895effe4 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_40-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55672553f90fe09652c232ceec532862bbc6a1739c621ac24fd1aacbf4d29a99 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_41-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_41-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..47962466854b560f7cea40c2dde51771d3ad2b34 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_41-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8297fa3f0b4f6a8460943fb2048658e9dd0effe1647003d6cc671f6e6a456442 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_41-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_41-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e2ae7eea6f468c9ca5c6ca111ab821eddb0d462 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_41-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75b54ee62653a4dbccd1c4c63a195d36ff0b2e71655509381c435f2cd09276ce +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_41-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_41-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..36ed6c5e429b0ad042b0ddba2d6554d839d1025c --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_41-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:182cb6e5f410fc3829f235c128ce54204bc91fef7b5a921bc31b7496154632c0 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_41-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_41-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0712622d151dcd3fb8ad179d3b27b78f5ad09b4a --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_41-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8f0ce5de21a8106a0bd2c68099bcbd328f47102345aaca47e6e55201dcfef1a +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_42-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_42-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..44d39687fcf7c43daadd5feb0866fbdf0210b77f --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_42-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3ef242564f4422ab57cc1c0dbb3e61150aadcfae97ae7897e4da54e147343fc +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_42-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_42-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d01b3aedb2febc39fb26ed0ee0c0c87e725fa9eb --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_42-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b1888eca3c1f552e98ae50914279f8cabc2d31bdf4a31519b5680832279bf57 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_42-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_42-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ebb44513425ce8a9483ab640d6f09afc8e503da --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_42-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dcb91f237bb4c3489af4a296cbf4f87b3d07bdafa9b63aed13adaf16c96438d +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_42-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_42-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e6a8fcc98d96cf8e9def18f5701f5b745afc63ba --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_42-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5a181153147d363b882745acc0ab1f93b973ddfcf027feec4edbc0c1701c5b3 +size 77130243 diff --git a/8b7178b25bopt/global_step115146/layer_44-model_00-model_states.pt b/8b7178b25bopt/global_step115146/layer_44-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c2e800b1f275bf9f5a4bafe3a583d65b4ad341b --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_44-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e433b04b1bbbd354ab9da5fd2d3cafb01d4ea032967e52b160b3e07dc6cc8c6 +size 15555 diff --git a/8b7178b25bopt/global_step115146/layer_44-model_01-model_states.pt b/8b7178b25bopt/global_step115146/layer_44-model_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..87f95836d561ccf87645c4ad2d80841269c9b197 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_44-model_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5de05ab8b309e3be0f24a4796a849be67d5e7bca82353989e02177dd693870b9 +size 15555 diff --git a/8b7178b25bopt/global_step115146/layer_44-model_02-model_states.pt b/8b7178b25bopt/global_step115146/layer_44-model_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..081d2d9300e0bd54042f5e1fda5127fc2b3e845a --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_44-model_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acd478b09d2a3eba752bd8ae4b4dee61f66b0dc139282f5e94dc1f27016be0b6 +size 15555 diff --git a/8b7178b25bopt/global_step115146/layer_44-model_03-model_states.pt b/8b7178b25bopt/global_step115146/layer_44-model_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3420889ad8bc4365b62595e1a56706ed40546c03 --- /dev/null +++ b/8b7178b25bopt/global_step115146/layer_44-model_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2e5ef854855160772292771f3c2d8f6f7ee99cfdc74ab3ac45008b1fa4566a9 +size 15555 diff --git a/8b7178b25bopt/global_step115146/mp_rank_00_model_states.pt b/8b7178b25bopt/global_step115146/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4a5be04e53475dba768c136f6f6ba27bfd5bad6 --- /dev/null +++ b/8b7178b25bopt/global_step115146/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2a7da581a1e34dc012a1c8f282ed643b725ec0f03653c23daca99d79254f0df +size 31603 diff --git a/8b7178b25bopt/global_step115146/mp_rank_01_model_states.pt b/8b7178b25bopt/global_step115146/mp_rank_01_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd6a70c5c5f709987848e79cc28d66e07375a9f1 --- /dev/null +++ b/8b7178b25bopt/global_step115146/mp_rank_01_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33ccb0d47b4e9d5ead80a55a0dfc9d580110eb1c59c483d3ff8ce9d6b32cc2cc +size 31603 diff --git a/8b7178b25bopt/global_step115146/mp_rank_02_model_states.pt b/8b7178b25bopt/global_step115146/mp_rank_02_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec82b8a896446174349a7244323a1e9103d0a53c --- /dev/null +++ b/8b7178b25bopt/global_step115146/mp_rank_02_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf656c93083ee45e21d1828a4f7d9745f056e1e230f5a9807ceca7fadaf1b16f +size 31603 diff --git a/8b7178b25bopt/global_step115146/mp_rank_03_model_states.pt b/8b7178b25bopt/global_step115146/mp_rank_03_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..16b0119f90712ada64a30635693ee3c33d6271ef --- /dev/null +++ b/8b7178b25bopt/global_step115146/mp_rank_03_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43e5fe51c2afb9e2f5c708de53738c1ee5f0b94e046ade441f36f8d8f54915e4 +size 31603 diff --git a/8b7178b25bopt/global_step115146/mp_rank_04_model_states.pt b/8b7178b25bopt/global_step115146/mp_rank_04_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..51b46298f0c3c9f751300ef74c5bb40c608bdeca --- /dev/null +++ b/8b7178b25bopt/global_step115146/mp_rank_04_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a68848aaf1b7f4292f45d981cb4ff8f8722c096b805f2ff966397994508ba7b5 +size 31475 diff --git a/8b7178b25bopt/global_step115146/mp_rank_05_model_states.pt b/8b7178b25bopt/global_step115146/mp_rank_05_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd32026289dc031b4dcf70b3278996ef77e913e1 --- /dev/null +++ b/8b7178b25bopt/global_step115146/mp_rank_05_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:442f8c0034b29a1f1a8b27bf944022c9003c5e2d1fed3555e598929963c80902 +size 31475 diff --git a/8b7178b25bopt/global_step115146/mp_rank_06_model_states.pt b/8b7178b25bopt/global_step115146/mp_rank_06_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..76640fed89cc97b146154e6ee536a5bd2ec853e9 --- /dev/null +++ b/8b7178b25bopt/global_step115146/mp_rank_06_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:643bcac6d64abe6166ec0b0c37df828931e06b4e129b2cbb874052af821e2c7c +size 31475 diff --git a/8b7178b25bopt/global_step115146/mp_rank_07_model_states.pt b/8b7178b25bopt/global_step115146/mp_rank_07_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..314cee0e689868cf7425dff5ee2a798c161c7894 --- /dev/null +++ b/8b7178b25bopt/global_step115146/mp_rank_07_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93796eea7f51cc005f89ed3147eb40542a71d098732aaade3d60c3ff2ebcac48 +size 31475 diff --git a/8b7178b25bopt/global_step115146/mp_rank_08_model_states.pt b/8b7178b25bopt/global_step115146/mp_rank_08_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b27ecd34a58e4f636fd8f30bd87a98b4c0e8e36b --- /dev/null +++ b/8b7178b25bopt/global_step115146/mp_rank_08_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9458de11784f9d40f365736b5bd894ba80034a3de8ae61a02c0c2f3df3fa472f +size 31539 diff --git a/8b7178b25bopt/global_step115146/mp_rank_09_model_states.pt b/8b7178b25bopt/global_step115146/mp_rank_09_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..eff8cc81c1aa121ccffda43159b03f58d33e1b44 --- /dev/null +++ b/8b7178b25bopt/global_step115146/mp_rank_09_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8672ff0d9e3d461abf807ac30b00a98df0f58d83faab59069b16e78be5c664b +size 31539 diff --git a/8b7178b25bopt/global_step115146/mp_rank_10_model_states.pt b/8b7178b25bopt/global_step115146/mp_rank_10_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f952de7aacf590f5b3ada36794c01686ed739aef --- /dev/null +++ b/8b7178b25bopt/global_step115146/mp_rank_10_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43af2c22eb5272b6af3d2c427b888d48441041e9f27f34cdce91c0c1dfdef78f +size 31539 diff --git a/8b7178b25bopt/global_step115146/mp_rank_11_model_states.pt b/8b7178b25bopt/global_step115146/mp_rank_11_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9c7ab627a0ad8a85766c8f3ec2ead19f868a47f --- /dev/null +++ b/8b7178b25bopt/global_step115146/mp_rank_11_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc2b35ac6f4b55cc13bcb9640abb789760e63d8e8fa743b2e5385c08171cd370 +size 31539 diff --git a/8b7178b25bopt/global_step115146/mp_rank_12_model_states.pt b/8b7178b25bopt/global_step115146/mp_rank_12_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..95679db396fd1d9bd743f3c9e50731d6134a6697 --- /dev/null +++ b/8b7178b25bopt/global_step115146/mp_rank_12_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c059febdb8feb4854ff7b9da19766a8a9dea611032ad7ec14678a1c15637307 +size 31731 diff --git a/8b7178b25bopt/global_step115146/mp_rank_13_model_states.pt b/8b7178b25bopt/global_step115146/mp_rank_13_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6226fa492202dfe623156234c84a0a9e240d9a24 --- /dev/null +++ b/8b7178b25bopt/global_step115146/mp_rank_13_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff9c067e56c35b4d272ff9c9f3366f310951b2ff1c7a7ed63472d2e96784ea10 +size 31731 diff --git a/8b7178b25bopt/global_step115146/mp_rank_14_model_states.pt b/8b7178b25bopt/global_step115146/mp_rank_14_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..589005238c7be05d3e142c518ebd4404ea41d329 --- /dev/null +++ b/8b7178b25bopt/global_step115146/mp_rank_14_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4ba69273118509990c11d8ec981d03d921b08b9e3152c5f2e2f9c63d727e979 +size 31731 diff --git a/8b7178b25bopt/global_step115146/mp_rank_15_model_states.pt b/8b7178b25bopt/global_step115146/mp_rank_15_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..da65e8d9729e9d106daf1993a578dfab54abece2 --- /dev/null +++ b/8b7178b25bopt/global_step115146/mp_rank_15_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4012946107c22aa452180ec6d0650519ea54b5db1047d9dfa4ecfe8f583c4993 +size 31731 diff --git a/8b7178b25bopt/latest b/8b7178b25bopt/latest new file mode 100644 index 0000000000000000000000000000000000000000..572d18ce2f5812ddc79da27003b8e453db249430 --- /dev/null +++ b/8b7178b25bopt/latest @@ -0,0 +1 @@ +global_step115146 \ No newline at end of file diff --git a/8b7178b25bopt/sbatch_8b7178b25bopt.sh b/8b7178b25bopt/sbatch_8b7178b25bopt.sh new file mode 100755 index 0000000000000000000000000000000000000000..f13a2cba5b1dbd756f5d2405827fd6f1c86d8a6f --- /dev/null +++ b/8b7178b25bopt/sbatch_8b7178b25bopt.sh @@ -0,0 +1,166 @@ +#!/bin/bash +#SBATCH --exclude=nid007542 +#SBATCH --nodes=32 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=40 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=8b7178b25bopt + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" + +TRAIN_DATA_PATH=train25b.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_25B_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + + +PP_SIZE=4 +TP_SIZE=4 + +MICRO_BATCH_SIZE=1 +GRADIENT_ACCUMULATION_STEPS=4 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_6796M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=5000 + +# Tokens: 178000000000 +# -> Samples: 86914062 +#TRAIN_SAMPLES=86_914_062 +TRAIN_SAMPLES=117_910_413 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 1_179_104 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1000 \ + --eval-iters 1 \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/8b7178b25bopt/sbatch_8b7178b25boptval.sh b/8b7178b25bopt/sbatch_8b7178b25boptval.sh new file mode 100644 index 0000000000000000000000000000000000000000..96d84e182165f9f299ff279fd9ae591c071b0500 --- /dev/null +++ b/8b7178b25bopt/sbatch_8b7178b25boptval.sh @@ -0,0 +1,173 @@ +#!/bin/bash +#SBATCH --exclude=nid007542 +#SBATCH --nodes=32 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=40 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=8b7178b25boptval +VARIANT_CKPT=lm1-8b7-178b-c4-repetitions/8b7178b25bopt + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=$VARIANT_CKPT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" + +TRAIN_DATA_PATH=train400m.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_12B_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + +PP_SIZE=4 +TP_SIZE=4 + +MICRO_BATCH_SIZE=1 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=512 +#$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_6796M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=5000 + +# Tokens: 11522010000 +# -> Samples: 5625981 +TRAIN_SAMPLES=1 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 0 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + --override-lr-scheduler \ + --reset-progress \ + --no-load-optim \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1 \ + --eval-iters 100 \ + --eval-only true \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + --num-workers 0 \ + --valid-num-workers 0 \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" + diff --git a/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683213561.nid005976.103177.0 b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683213561.nid005976.103177.0 new file mode 100644 index 0000000000000000000000000000000000000000..e91e03545d70f16790ac48795ddc50c0d9893b29 --- /dev/null +++ b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683213561.nid005976.103177.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fd96187f16b498535190f980ce1c93885df664c219d55126c9c1bd9b5207d68 +size 30592421 diff --git a/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683386375.nid006565.96207.0 b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683386375.nid006565.96207.0 new file mode 100644 index 0000000000000000000000000000000000000000..ca2928b9004e31b3c42e10b32b066937f74f59e0 --- /dev/null +++ b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683386375.nid006565.96207.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc933ee4427f34f89f816a300020a7f9478f27a1e1f622f5488aad013b080f0a +size 30625570 diff --git a/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683559162.nid005348.32541.0 b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683559162.nid005348.32541.0 new file mode 100644 index 0000000000000000000000000000000000000000..9b461dc7d01eb395a9bd8c42265cb3ec6995a5d1 --- /dev/null +++ b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683559162.nid005348.32541.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:489e2211ba68327ab8c9e8379af12975f82efc16e9cd20a94764ea9d5d2d80de +size 5529842 diff --git a/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683592046.nid006323.71350.0 b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683592046.nid006323.71350.0 new file mode 100644 index 0000000000000000000000000000000000000000..8e37bf792ce0c3e405e18a517e66ff460691f2d4 --- /dev/null +++ b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683592046.nid006323.71350.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90759e77db14851835322ad2c06c8acd4d7dca47a4085e990a626f2fab820ba4 +size 40 diff --git a/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683592589.nid005787.87364.0 b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683592589.nid005787.87364.0 new file mode 100644 index 0000000000000000000000000000000000000000..6b79984511f3a1c1ce4305477683901858fe43b7 --- /dev/null +++ b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683592589.nid005787.87364.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0a6e30954e93f54d9b71030ec765a3b7a6744cebf6c09aa881fcebae3b88938 +size 40 diff --git a/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683593118.nid006037.3135.0 b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683593118.nid006037.3135.0 new file mode 100644 index 0000000000000000000000000000000000000000..13695be6eb812c1a66968fc07ce0b5927c3edd3f --- /dev/null +++ b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683593118.nid006037.3135.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:181757844171bbced84c48ff913f3509419313c272d2155aabba682315c67652 +size 40 diff --git a/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683593665.nid006741.39820.0 b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683593665.nid006741.39820.0 new file mode 100644 index 0000000000000000000000000000000000000000..b87c622d561a5decc2d0527971b6db7ee0e3797b --- /dev/null +++ b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683593665.nid006741.39820.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08382e875941c08eba1d1c6193b336c1313036c07f13ee79a471930e698a88d3 +size 40 diff --git a/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683790949.nid006147.87345.0 b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683790949.nid006147.87345.0 new file mode 100644 index 0000000000000000000000000000000000000000..7eafc00ac7654fc67460f7d7f16d98f6595d7a6b --- /dev/null +++ b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683790949.nid006147.87345.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb41fdb4cdc8db5e65cee7612c4a16af105f9173656a3464a5a161ce4f0bb5f3 +size 29944783 diff --git a/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683959666.nid006098.48926.0 b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683959666.nid006098.48926.0 new file mode 100644 index 0000000000000000000000000000000000000000..d7223211cdbd6af20f30ff7605c0609761cdfd76 --- /dev/null +++ b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1683959666.nid006098.48926.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab587faf51010154ec4fd933fb3fa64fe1212aa90774f0417bf7b67071fa6b02 +size 30934506 diff --git a/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1684132502.nid005646.16966.0 b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1684132502.nid005646.16966.0 new file mode 100644 index 0000000000000000000000000000000000000000..b828d530d18735854bb897c695328e9c23680f75 --- /dev/null +++ b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1684132502.nid005646.16966.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:797b4128de8e6b224ecd7a83683e0d25c0b046e4189ca5c9f1a1aa089bc97823 +size 30350490 diff --git a/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1684305320.nid007164.5260.0 b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1684305320.nid007164.5260.0 new file mode 100644 index 0000000000000000000000000000000000000000..e09d3b0ddff52778b1fa452cd16a619b644d6e5f --- /dev/null +++ b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1684305320.nid007164.5260.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a391c631bfbc46c8005ceac311a02cc6a4781fed6ef93cd6945c3c9e6dd108c +size 30789122 diff --git a/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1684478137.nid006905.52200.0 b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1684478137.nid006905.52200.0 new file mode 100644 index 0000000000000000000000000000000000000000..0a4a9cfcfdc002bbcc02595acc98ab3eb55ff977 --- /dev/null +++ b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1684478137.nid006905.52200.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe6cf53e7e34ad4228ebe4205ca24166c779937e450aff60c4cd5300cd255d26 +size 30843575 diff --git a/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1684650931.nid006673.118726.0 b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1684650931.nid006673.118726.0 new file mode 100644 index 0000000000000000000000000000000000000000..353be5974b07ec778de2b122d6d926b9ac147dfc --- /dev/null +++ b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1684650931.nid006673.118726.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cece3bd114385d962a2505e63277e0efe362cba35f8c446e5092aa6c11271c3 +size 6393806 diff --git a/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1684687835.nid006673.94013.0 b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1684687835.nid006673.94013.0 new file mode 100644 index 0000000000000000000000000000000000000000..f9eb729c048d7985fe26821215c7b7a16c3dc965 --- /dev/null +++ b/8b7178b25bopt/tensorboard_8b7178b25bopt/events.out.tfevents.1684687835.nid006673.94013.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72fb9015b609d50ea52a789a5e3071443591a150a2518810a497d4f20a48cfc1 +size 18264290 diff --git a/8b7178b25bopt/tensorboard_8b7178b25boptval/events.out.tfevents.1684845734.nid006103.9257.0 b/8b7178b25bopt/tensorboard_8b7178b25boptval/events.out.tfevents.1684845734.nid006103.9257.0 new file mode 100644 index 0000000000000000000000000000000000000000..4f5f86b353fbbece934ef6a7b64fbf5f27f683b7 --- /dev/null +++ b/8b7178b25bopt/tensorboard_8b7178b25boptval/events.out.tfevents.1684845734.nid006103.9257.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:043e96a01e553c0d66b477ca206ee18b48e99f0c110863186c7bd412999347f8 +size 980 diff --git a/8b7178b25bopt/transformers/config.json b/8b7178b25bopt/transformers/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d2e81ece1e995f45376a40c27819d8bf73b68bd0 --- /dev/null +++ b/8b7178b25bopt/transformers/config.json @@ -0,0 +1 @@ +{"vocab_size": 50688, "n_positions": 2048, "n_embd": 3584, "n_layer": 40, "n_head": 28, "n_inner": 14336, "activation_function": "gelu", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 50256, "eos_token_id": 50256, "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": null, "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": true, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "chunk_size_feed_forward": 0, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": ["GPT2LMHeadModel"], "finetuning_task": null, "id2label": {"0": "LABEL_0", "1": "LABEL_1"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1}, "tokenizer_class": null, "prefix": null, "pad_token_id": null, "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": null, "problem_type": null, "_name_or_path": "", "transformers_version": "4.25.0.dev0", "n_ctx": 1024, "gradient_checkpointing": false, "model_type": "gpt2"} \ No newline at end of file diff --git a/8b7178b25bopt/transformers/pytorch_model.bin b/8b7178b25bopt/transformers/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..87eacb32df4065e381fcd8553fbd220d8d739e90 --- /dev/null +++ b/8b7178b25bopt/transformers/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36db4f85f5140eaaecfadbe025941bfd2d8631fcc14a99deb33f94b711f8597c +size 13048732401 diff --git a/8b7178b4b/3583607.err b/8b7178b4b/3583607.err new file mode 100644 index 0000000000000000000000000000000000000000..840249a25c7a7cd70e53f325674dad8681800ac1 --- /dev/null +++ b/8b7178b4b/3583607.err @@ -0,0 +1,3625 @@ +14: 2023-05-25 13:35:25.938836: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +14: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +14: 2023-05-25 13:35:25.938853: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +14: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +14: 2023-05-25 13:35:25.938879: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +14: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +14: 2023-05-25 13:35:25.938889: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +14: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +14: 2023-05-25 13:35:25.938919: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +14: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +14: 2023-05-25 13:35:25.938927: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +14: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +14: 2023-05-25 13:35:25.938954: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +14: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +14: 2023-05-25 13:35:25.938976: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +14: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 5: 2023-05-25 13:35:25.939771: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 5: 2023-05-25 13:35:25.939836: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 6: 2023-05-25 13:35:25.939883: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 6: 2023-05-25 13:35:25.939922: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 6: 2023-05-25 13:35:25.939941: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 5: 2023-05-25 13:35:25.939867: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 5: 2023-05-25 13:35:25.939841: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 5: 2023-05-25 13:35:25.939891: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 5: 2023-05-25 13:35:25.939905: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 5: 2023-05-25 13:35:25.939909: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 5: 2023-05-25 13:35:25.939956: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 3: 2023-05-25 13:35:25.939866: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 3: 2023-05-25 13:35:25.939884: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 3: 2023-05-25 13:35:25.939904: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 6: 2023-05-25 13:35:25.939952: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 6: 2023-05-25 13:35:25.939956: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 6: 2023-05-25 13:35:25.939983: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 6: 2023-05-25 13:35:25.939985: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 6: 2023-05-25 13:35:25.940017: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 3: 2023-05-25 13:35:25.939920: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 3: 2023-05-25 13:35:25.939930: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 3: 2023-05-25 13:35:25.939935: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 3: 2023-05-25 13:35:25.939950: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 3: 2023-05-25 13:35:25.940018: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2: 2023-05-25 13:35:25.940264: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2: 2023-05-25 13:35:25.940285: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2: 2023-05-25 13:35:25.940295: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +25: 2023-05-25 13:35:25.940495: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +25: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +25: 2023-05-25 13:35:25.940513: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +25: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +25: 2023-05-25 13:35:25.940492: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2: 2023-05-25 13:35:25.940319: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2: 2023-05-25 13:35:25.940339: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2: 2023-05-25 13:35:25.940323: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2: 2023-05-25 13:35:25.940369: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +18: 2023-05-25 13:35:25.940587: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +18: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +18: 2023-05-25 13:35:25.940629: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +18: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +18: 2023-05-25 13:35:25.940638: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 2: 2023-05-25 13:35:25.940411: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +25: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +25: 2023-05-25 13:35:25.940530: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +25: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +25: 2023-05-25 13:35:25.940556: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +25: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +25: 2023-05-25 13:35:25.940568: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +25: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +23: 2023-05-25 13:35:25.940824: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +23: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +23: 2023-05-25 13:35:25.940841: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +23: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +23: 2023-05-25 13:35:25.940822: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +25: 2023-05-25 13:35:25.940631: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +25: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +25: 2023-05-25 13:35:25.940630: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +25: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +16: 2023-05-25 13:35:25.940705: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +16: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +16: 2023-05-25 13:35:25.940712: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +16: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +16: 2023-05-25 13:35:25.940735: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +16: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +16: 2023-05-25 13:35:25.940802: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +16: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +16: 2023-05-25 13:35:25.940838: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +16: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +16: 2023-05-25 13:35:25.940847: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +16: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +16: 2023-05-25 13:35:25.940858: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +16: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +16: 2023-05-25 13:35:25.940860: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +16: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +18: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +18: 2023-05-25 13:35:25.940654: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +18: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +18: 2023-05-25 13:35:25.940661: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +18: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 9: 2023-05-25 13:35:25.941028: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 9: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 9: 2023-05-25 13:35:25.941047: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 9: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 9: 2023-05-25 13:35:25.941052: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +18: 2023-05-25 13:35:25.940678: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +18: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +18: 2023-05-25 13:35:25.940680: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +18: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +18: 2023-05-25 13:35:25.940700: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +18: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +23: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +23: 2023-05-25 13:35:25.940867: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +23: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +23: 2023-05-25 13:35:25.940889: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +23: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +23: 2023-05-25 13:35:25.940895: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +23: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +23: 2023-05-25 13:35:25.940910: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +23: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +23: 2023-05-25 13:35:25.940957: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +23: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 9: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 9: 2023-05-25 13:35:25.941073: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 9: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 9: 2023-05-25 13:35:25.941097: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 9: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 9: 2023-05-25 13:35:25.941103: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 9: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 9: 2023-05-25 13:35:25.941106: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 9: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 9: 2023-05-25 13:35:25.941113: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 9: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +17: 2023-05-25 13:35:25.941467: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +17: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +17: 2023-05-25 13:35:25.941480: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +17: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +17: 2023-05-25 13:35:25.941495: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +21: 2023-05-25 13:35:25.941454: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +21: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +21: 2023-05-25 13:35:25.941496: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +21: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +21: 2023-05-25 13:35:25.941524: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +13: 2023-05-25 13:35:25.941590: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +13: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +13: 2023-05-25 13:35:25.941606: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +13: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +13: 2023-05-25 13:35:25.941609: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +21: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +21: 2023-05-25 13:35:25.941528: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +21: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +21: 2023-05-25 13:35:25.941544: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +21: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +21: 2023-05-25 13:35:25.941549: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +21: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +21: 2023-05-25 13:35:25.941559: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +17: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +17: 2023-05-25 13:35:25.941512: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +17: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +17: 2023-05-25 13:35:25.941519: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +17: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +28: 2023-05-25 13:35:25.941700: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +28: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +28: 2023-05-25 13:35:25.941712: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +28: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +28: 2023-05-25 13:35:25.941731: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +21: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +22: 2023-05-25 13:35:25.941764: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +22: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +22: 2023-05-25 13:35:25.941773: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +22: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +17: 2023-05-25 13:35:25.941533: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +17: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +17: 2023-05-25 13:35:25.941534: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +17: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +28: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +28: 2023-05-25 13:35:25.941742: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +28: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +28: 2023-05-25 13:35:25.941756: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +28: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +21: 2023-05-25 13:35:25.941594: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +21: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +17: 2023-05-25 13:35:25.941543: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +17: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +28: 2023-05-25 13:35:25.941758: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +28: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +28: 2023-05-25 13:35:25.941766: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +28: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +28: 2023-05-25 13:35:25.941781: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +28: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +13: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +13: 2023-05-25 13:35:25.941598: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +13: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +13: 2023-05-25 13:35:25.941658: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +13: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +11: 2023-05-25 13:35:25.941784: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +11: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +11: 2023-05-25 13:35:25.941790: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +11: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +11: 2023-05-25 13:35:25.941823: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +10: 2023-05-25 13:35:25.941910: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +10: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +22: 2023-05-25 13:35:25.941878: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +22: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +22: 2023-05-25 13:35:25.941890: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +22: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +13: 2023-05-25 13:35:25.941673: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +13: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +13: 2023-05-25 13:35:25.941681: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +13: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +13: 2023-05-25 13:35:25.941679: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +11: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +11: 2023-05-25 13:35:25.941850: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +11: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +11: 2023-05-25 13:35:25.941855: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +11: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 8: 2023-05-25 13:35:25.941843: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 8: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 8: 2023-05-25 13:35:25.941861: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 8: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 8: 2023-05-25 13:35:25.941873: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +22: 2023-05-25 13:35:25.941898: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +22: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +22: 2023-05-25 13:35:25.941903: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +22: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +13: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +11: 2023-05-25 13:35:25.941877: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +11: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +11: 2023-05-25 13:35:25.941886: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +11: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +11: 2023-05-25 13:35:25.941891: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +22: 2023-05-25 13:35:25.941948: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +22: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +22: 2023-05-25 13:35:25.941960: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +22: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +11: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +10: 2023-05-25 13:35:25.942045: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +10: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +10: 2023-05-25 13:35:25.942074: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +10: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +10: 2023-05-25 13:35:25.942085: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +10: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +10: 2023-05-25 13:35:25.942089: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +10: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +10: 2023-05-25 13:35:25.942107: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +10: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +27: 2023-05-25 13:35:25.942056: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +27: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +27: 2023-05-25 13:35:25.942059: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +27: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +27: 2023-05-25 13:35:25.942065: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +10: 2023-05-25 13:35:25.942160: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +10: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +10: 2023-05-25 13:35:25.942173: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +10: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +29: 2023-05-25 13:35:25.942127: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +29: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +29: 2023-05-25 13:35:25.942140: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +29: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +29: 2023-05-25 13:35:25.942169: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 0: 2023-05-25 13:35:25.942206: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 0: 2023-05-25 13:35:25.942212: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 0: 2023-05-25 13:35:25.942202: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +27: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +27: 2023-05-25 13:35:25.942104: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +27: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +27: 2023-05-25 13:35:25.942122: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +27: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +27: 2023-05-25 13:35:25.942125: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +27: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +27: 2023-05-25 13:35:25.942131: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +27: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +27: 2023-05-25 13:35:25.942145: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +27: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +12: 2023-05-25 13:35:25.942263: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +12: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +12: 2023-05-25 13:35:25.942299: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +12: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +12: 2023-05-25 13:35:25.942307: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +24: 2023-05-25 13:35:25.942210: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +24: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +24: 2023-05-25 13:35:25.942237: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +24: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +24: 2023-05-25 13:35:25.942251: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +29: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +29: 2023-05-25 13:35:25.942181: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +29: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +29: 2023-05-25 13:35:25.942183: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +29: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +31: 2023-05-25 13:35:25.942171: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +31: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +31: 2023-05-25 13:35:25.942189: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +31: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +31: 2023-05-25 13:35:25.942178: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 8: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 8: 2023-05-25 13:35:25.941892: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 8: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 8: 2023-05-25 13:35:25.941883: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 8: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 0: 2023-05-25 13:35:25.942407: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 0: 2023-05-25 13:35:25.942421: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 0: 2023-05-25 13:35:25.942486: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +15: 2023-05-25 13:35:25.942321: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +15: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +15: 2023-05-25 13:35:25.942336: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +15: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +15: 2023-05-25 13:35:25.942340: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +29: 2023-05-25 13:35:25.942184: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +29: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +29: 2023-05-25 13:35:25.942163: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +29: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 8: 2023-05-25 13:35:25.941909: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 8: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 8: 2023-05-25 13:35:25.941922: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 8: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 8: 2023-05-25 13:35:25.941925: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 0: 2023-05-25 13:35:25.942502: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +12: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +12: 2023-05-25 13:35:25.942262: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +12: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +12: 2023-05-25 13:35:25.942346: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +12: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +24: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +24: 2023-05-25 13:35:25.942267: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +24: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +24: 2023-05-25 13:35:25.942287: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +24: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +29: 2023-05-25 13:35:25.942205: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +29: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +31: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +31: 2023-05-25 13:35:25.942231: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +31: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +31: 2023-05-25 13:35:25.942235: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +31: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 8: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +12: 2023-05-25 13:35:25.942362: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +12: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +12: 2023-05-25 13:35:25.942369: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +12: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +12: 2023-05-25 13:35:25.942456: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +24: 2023-05-25 13:35:25.942317: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +24: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +24: 2023-05-25 13:35:25.942286: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +24: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +24: 2023-05-25 13:35:25.942333: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +31: 2023-05-25 13:35:25.942246: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +31: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +31: 2023-05-25 13:35:25.942255: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +31: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +31: 2023-05-25 13:35:25.942331: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +12: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +24: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +31: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 0: 2023-05-25 13:35:25.942716: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +15: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +15: 2023-05-25 13:35:25.942372: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +15: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +15: 2023-05-25 13:35:25.942387: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +15: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +15: 2023-05-25 13:35:25.942400: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +15: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +15: 2023-05-25 13:35:25.942370: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +15: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +15: 2023-05-25 13:35:25.942414: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +15: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +26: 2023-05-25 13:35:25.943597: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +26: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +26: 2023-05-25 13:35:25.943609: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +26: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +26: 2023-05-25 13:35:25.943596: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +26: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +26: 2023-05-25 13:35:25.943624: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +26: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +26: 2023-05-25 13:35:25.943643: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +26: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +26: 2023-05-25 13:35:25.943653: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +26: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +26: 2023-05-25 13:35:25.943647: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +19: 2023-05-25 13:35:25.943692: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +19: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +19: 2023-05-25 13:35:25.943704: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +19: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +19: 2023-05-25 13:35:25.943713: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +26: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +26: 2023-05-25 13:35:25.943651: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +26: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +19: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +19: 2023-05-25 13:35:25.943735: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +19: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +19: 2023-05-25 13:35:25.943718: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +19: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +19: 2023-05-25 13:35:25.943719: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +19: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +19: 2023-05-25 13:35:25.943754: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +19: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +19: 2023-05-25 13:35:25.943755: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +19: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 1: 2023-05-25 13:35:25.943811: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 1: 2023-05-25 13:35:25.943822: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 1: 2023-05-25 13:35:25.943828: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 1: 2023-05-25 13:35:25.943812: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 1: 2023-05-25 13:35:25.943842: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 1: 2023-05-25 13:35:25.943858: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 1: 2023-05-25 13:35:25.943862: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 1: 2023-05-25 13:35:25.943854: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 7: 2023-05-25 13:35:25.943944: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 7: 2023-05-25 13:35:25.943953: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 7: 2023-05-25 13:35:25.943970: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 7: 2023-05-25 13:35:25.943980: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 7: 2023-05-25 13:35:25.943981: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +30: 2023-05-25 13:35:25.943963: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +30: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +30: 2023-05-25 13:35:25.943978: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +30: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +30: 2023-05-25 13:35:25.944001: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 7: 2023-05-25 13:35:25.943983: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 7: 2023-05-25 13:35:25.943992: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 7: 2023-05-25 13:35:25.943991: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +30: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +30: 2023-05-25 13:35:25.944002: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +30: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +30: 2023-05-25 13:35:25.944016: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +30: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +30: 2023-05-25 13:35:25.944018: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +30: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +30: 2023-05-25 13:35:25.944025: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +30: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +30: 2023-05-25 13:35:25.944034: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +30: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +20: 2023-05-25 13:35:25.944159: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +20: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +20: 2023-05-25 13:35:25.944162: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +20: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +20: 2023-05-25 13:35:25.944189: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +20: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +20: 2023-05-25 13:35:25.944192: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +20: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +20: 2023-05-25 13:35:25.944202: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +20: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +20: 2023-05-25 13:35:25.944211: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +20: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +20: 2023-05-25 13:35:25.944218: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +20: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +20: 2023-05-25 13:35:25.944212: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 4: 2023-05-25 13:35:25.944197: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 4: 2023-05-25 13:35:25.944212: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 4: 2023-05-25 13:35:25.944195: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +20: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 4: 2023-05-25 13:35:25.944224: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 4: 2023-05-25 13:35:25.944195: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 4: 2023-05-25 13:35:25.944241: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 4: 2023-05-25 13:35:25.944232: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. + 4: 2023-05-25 13:35:25.944237: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA + 4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +29: 2023-05-25 13:35:38.217859: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:35:38.217883: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:35:38.217928: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:35:38.217912: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:35:38.217941: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:35:38.217952: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:35:38.217962: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:35:38.217971: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:35:38.218764: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +29: 2023-05-25 13:35:38.218782: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +29: 2023-05-25 13:35:38.218803: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +29: 2023-05-25 13:35:38.218812: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +29: 2023-05-25 13:35:38.218824: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +29: 2023-05-25 13:35:38.218827: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +29: 2023-05-25 13:35:38.218839: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +29: 2023-05-25 13:35:38.218842: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +27: 2023-05-25 13:35:38.218359: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.218439: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:35:38.218377: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:35:38.218399: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.218454: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:35:38.218373: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:35:38.218411: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.218462: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:35:38.218422: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.218481: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:35:38.218402: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:35:38.218394: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:35:38.218316: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.218493: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:35:38.218422: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:35:38.218424: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.218491: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:35:38.218446: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:35:38.218440: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:35:38.218347: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.218508: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:35:38.218461: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:35:38.218387: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.218509: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:35:38.218470: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:35:38.218346: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.219143: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +21: 2023-05-25 13:35:38.219160: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 1: 2023-05-25 13:35:38.218477: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:35:38.219199: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +27: 2023-05-25 13:35:38.219217: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.218385: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.219187: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +21: 2023-05-25 13:35:38.219188: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +21: 2023-05-25 13:35:38.219199: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 1: 2023-05-25 13:35:38.218487: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:35:38.219229: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.218387: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.219211: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 1: 2023-05-25 13:35:38.219205: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 1: 2023-05-25 13:35:38.219231: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 1: 2023-05-25 13:35:38.219250: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +27: 2023-05-25 13:35:38.219243: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +27: 2023-05-25 13:35:38.219249: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +27: 2023-05-25 13:35:38.219254: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +27: 2023-05-25 13:35:38.219256: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.218402: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.219215: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 1: 2023-05-25 13:35:38.219270: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +27: 2023-05-25 13:35:38.219262: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.218426: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:35:38.219228: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 1: 2023-05-25 13:35:38.219286: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 1: 2023-05-25 13:35:38.219295: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.219259: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 1: 2023-05-25 13:35:38.219304: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 1: 2023-05-25 13:35:38.219312: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.219281: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.219287: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.219311: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.219321: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.219323: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.219338: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +24: 2023-05-25 13:35:38.219340: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.221307: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:35:38.221325: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:35:38.221347: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:35:38.221357: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:35:38.221368: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:35:38.221382: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:35:38.221371: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:35:38.221377: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:35:38.222297: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.222317: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.222329: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.222339: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.222348: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.222354: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.222356: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 8: 2023-05-25 13:35:38.222363: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +19: 2023-05-25 13:35:38.222260: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.222276: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.222289: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.222301: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.222300: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.222321: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.222318: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.222319: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:35:38.222787: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +19: 2023-05-25 13:35:38.222806: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +19: 2023-05-25 13:35:38.222816: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +19: 2023-05-25 13:35:38.222845: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +19: 2023-05-25 13:35:38.222841: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +19: 2023-05-25 13:35:38.222844: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +19: 2023-05-25 13:35:38.222854: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +19: 2023-05-25 13:35:38.222862: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +12: 2023-05-25 13:35:38.223030: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.223050: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.223065: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.223090: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.223109: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.223125: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.223117: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.223111: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:35:38.223625: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +12: 2023-05-25 13:35:38.223657: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +12: 2023-05-25 13:35:38.223666: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +12: 2023-05-25 13:35:38.223688: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +12: 2023-05-25 13:35:38.223693: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +12: 2023-05-25 13:35:38.223697: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +12: 2023-05-25 13:35:38.223706: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +12: 2023-05-25 13:35:38.223720: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +13: 2023-05-25 13:35:38.225019: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:35:38.225035: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:35:38.225049: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:35:38.225066: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:35:38.225068: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:35:38.225088: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:35:38.225082: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:35:38.225078: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:35:38.225596: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +13: 2023-05-25 13:35:38.225617: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +13: 2023-05-25 13:35:38.225623: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +13: 2023-05-25 13:35:38.225640: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +13: 2023-05-25 13:35:38.225643: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +13: 2023-05-25 13:35:38.225650: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +13: 2023-05-25 13:35:38.225650: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +13: 2023-05-25 13:35:38.225658: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.226553: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:35:38.226569: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:35:38.226580: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:35:38.226589: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:35:38.226596: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:35:38.226597: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:35:38.226613: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:35:38.226611: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:35:38.227120: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.227137: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.227149: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.227158: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.227163: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.227166: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.227176: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +16: 2023-05-25 13:35:38.227189: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +15: 2023-05-25 13:35:38.226960: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:35:38.226976: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:35:38.226988: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:35:38.227005: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:35:38.227016: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:35:38.227017: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:35:38.226996: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:35:38.227023: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:35:38.227488: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +15: 2023-05-25 13:35:38.227507: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +15: 2023-05-25 13:35:38.227520: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +15: 2023-05-25 13:35:38.227529: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +15: 2023-05-25 13:35:38.227538: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +15: 2023-05-25 13:35:38.227537: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +15: 2023-05-25 13:35:38.227546: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +15: 2023-05-25 13:35:38.227560: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +20: 2023-05-25 13:35:38.235117: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:35:38.235158: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:35:38.235201: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:35:38.235212: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:35:38.235219: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:35:38.235177: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:35:38.235242: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:35:38.235227: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:35:38.235330: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:35:38.235819: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.235361: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:35:38.235405: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:35:38.235851: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.235428: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:35:38.235875: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.235418: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:35:38.235896: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.235387: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:35:38.235436: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:35:38.235454: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:35:38.236050: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +20: 2023-05-25 13:35:38.235914: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +20: 2023-05-25 13:35:38.235922: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +20: 2023-05-25 13:35:38.235934: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +20: 2023-05-25 13:35:38.235939: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.236080: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.236110: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.236121: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.236129: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.236136: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.236146: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +14: 2023-05-25 13:35:38.236158: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +28: 2023-05-25 13:35:38.235790: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:35:38.235828: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:35:38.235846: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:35:38.235868: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:35:38.235883: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:35:38.235866: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:35:38.235917: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:35:38.235931: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:35:38.236131: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:35:38.236168: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:35:38.236567: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +28: 2023-05-25 13:35:38.236603: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +28: 2023-05-25 13:35:38.236601: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.236203: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:35:38.236241: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:35:38.236198: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:35:38.236631: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.236182: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:35:38.236651: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +28: 2023-05-25 13:35:38.236669: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.236213: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:35:38.236682: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +28: 2023-05-25 13:35:38.236690: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.236227: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:35:38.236778: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.236816: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.236832: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.236880: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.236873: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.236910: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.236915: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 9: 2023-05-25 13:35:38.236935: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +30: 2023-05-25 13:35:38.238989: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:35:38.239006: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:35:38.239031: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:35:38.239055: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:35:38.239056: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:35:38.239072: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:35:38.239048: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:35:38.239065: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:35:38.239600: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +30: 2023-05-25 13:35:38.239617: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +30: 2023-05-25 13:35:38.239631: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +30: 2023-05-25 13:35:38.239638: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +30: 2023-05-25 13:35:38.239651: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +30: 2023-05-25 13:35:38.239657: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +30: 2023-05-25 13:35:38.239661: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +30: 2023-05-25 13:35:38.239664: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +10: 2023-05-25 13:35:38.239968: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:35:38.240019: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:35:38.240033: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:35:38.240060: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:35:38.240068: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:35:38.240105: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:35:38.240093: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:35:38.240074: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:35:38.240459: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +10: 2023-05-25 13:35:38.240482: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +10: 2023-05-25 13:35:38.240492: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.240096: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:35:38.240116: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:35:38.240118: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:35:38.240512: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +10: 2023-05-25 13:35:38.240514: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.240165: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:35:38.240529: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.240163: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:35:38.240536: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.240182: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:35:38.240545: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.240167: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:35:38.240185: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:35:38.240681: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.240695: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.240708: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.240717: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.240726: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.240729: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.240738: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +22: 2023-05-25 13:35:38.240741: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 6: 2023-05-25 13:35:38.240755: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:35:38.240777: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:35:38.240803: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:35:38.240820: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:35:38.240826: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:35:38.240806: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:35:38.240826: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:35:38.240836: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:35:38.241349: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 6: 2023-05-25 13:35:38.241378: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 6: 2023-05-25 13:35:38.241374: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 6: 2023-05-25 13:35:38.241384: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 6: 2023-05-25 13:35:38.241400: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 6: 2023-05-25 13:35:38.241414: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 5: 2023-05-25 13:35:38.240996: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:35:38.241424: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 5: 2023-05-25 13:35:38.241011: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:35:38.241031: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:35:38.241030: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:35:38.241431: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 5: 2023-05-25 13:35:38.241045: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:35:38.241049: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:35:38.241070: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:35:38.241054: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:35:38.241594: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 5: 2023-05-25 13:35:38.241618: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 5: 2023-05-25 13:35:38.241627: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 5: 2023-05-25 13:35:38.241647: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 5: 2023-05-25 13:35:38.241650: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 5: 2023-05-25 13:35:38.241654: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 5: 2023-05-25 13:35:38.241662: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 5: 2023-05-25 13:35:38.241672: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +11: 2023-05-25 13:35:38.241649: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:35:38.241678: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:35:38.241697: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:35:38.241720: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:35:38.241714: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:35:38.241723: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:35:38.241727: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:35:38.241737: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:35:38.242259: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +11: 2023-05-25 13:35:38.242271: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +11: 2023-05-25 13:35:38.242289: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +11: 2023-05-25 13:35:38.242291: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +11: 2023-05-25 13:35:38.242305: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +11: 2023-05-25 13:35:38.242318: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +11: 2023-05-25 13:35:38.242325: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +31: 2023-05-25 13:35:38.241824: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:35:38.242331: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +31: 2023-05-25 13:35:38.241864: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:35:38.241894: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:35:38.241908: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:35:38.241931: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:35:38.241926: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:35:38.241943: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:35:38.241942: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:35:38.242439: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +31: 2023-05-25 13:35:38.242465: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +31: 2023-05-25 13:35:38.242470: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +31: 2023-05-25 13:35:38.242481: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +31: 2023-05-25 13:35:38.242485: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +31: 2023-05-25 13:35:38.242493: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +31: 2023-05-25 13:35:38.242495: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +31: 2023-05-25 13:35:38.242499: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 3: 2023-05-25 13:35:38.247532: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:35:38.247550: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:35:38.247558: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:35:38.247581: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:35:38.247591: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:35:38.247584: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:35:38.247587: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:35:38.247606: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:35:38.248130: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 3: 2023-05-25 13:35:38.248149: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 3: 2023-05-25 13:35:38.248159: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 3: 2023-05-25 13:35:38.248174: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 3: 2023-05-25 13:35:38.248181: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 3: 2023-05-25 13:35:38.248184: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 3: 2023-05-25 13:35:38.248200: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 3: 2023-05-25 13:35:38.248207: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +17: 2023-05-25 13:35:38.256349: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:35:38.256367: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:35:38.256379: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:35:38.256385: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:35:38.256396: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:35:38.256394: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:35:38.256403: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:35:38.256408: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:35:38.256895: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +17: 2023-05-25 13:35:38.256914: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +17: 2023-05-25 13:35:38.256926: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +17: 2023-05-25 13:35:38.256932: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +17: 2023-05-25 13:35:38.256932: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +17: 2023-05-25 13:35:38.256941: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +17: 2023-05-25 13:35:38.256946: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +17: 2023-05-25 13:35:38.256958: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +26: 2023-05-25 13:35:38.258915: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:35:38.258934: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:35:38.258954: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:35:38.258955: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:35:38.258964: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:35:38.258977: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:35:38.258976: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:35:38.258978: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:35:38.258992: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:35:38.258980: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:35:38.259006: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:35:38.258986: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:35:38.259009: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:35:38.259475: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.258992: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:35:38.259495: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.259014: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:35:38.259503: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.259017: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:35:38.259515: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +26: 2023-05-25 13:35:38.259525: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.259488: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +26: 2023-05-25 13:35:38.259527: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +26: 2023-05-25 13:35:38.259533: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.259505: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +26: 2023-05-25 13:35:38.259542: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.259517: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.259537: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.259540: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.259545: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.259550: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 2: 2023-05-25 13:35:38.259563: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 4: 2023-05-25 13:35:38.259634: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.259651: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.259662: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:35:38.259661: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.259668: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:35:38.259684: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.259679: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.259691: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:35:38.259710: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:35:38.259667: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.259688: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:35:38.259694: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:35:38.259708: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.259686: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:35:38.259716: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:35:38.259714: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:35:38.259690: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.260179: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.259723: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:35:38.259719: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:35:38.259710: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.260198: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 4: 2023-05-25 13:35:38.260209: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.259736: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:35:38.259692: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:35:38.259721: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:35:38.259759: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:35:38.259737: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.260220: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 4: 2023-05-25 13:35:38.260227: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.259756: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:35:38.259726: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:35:38.259734: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:35:38.259776: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:35:38.259760: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:35:38.259744: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:35:38.259802: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:35:38.259752: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:35:38.259748: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:35:38.259798: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:35:38.259766: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:35:38.259750: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:35:38.259801: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:35:38.259777: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.260233: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +18: 2023-05-25 13:35:38.260306: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +23: 2023-05-25 13:35:38.259786: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:35:38.259780: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.260236: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +18: 2023-05-25 13:35:38.260326: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +23: 2023-05-25 13:35:38.259812: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:35:38.259781: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:35:38.260244: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +18: 2023-05-25 13:35:38.260343: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +23: 2023-05-25 13:35:38.259819: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:35:38.259794: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:35:38.259795: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:35:38.260354: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +18: 2023-05-25 13:35:38.260360: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +18: 2023-05-25 13:35:38.260366: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +18: 2023-05-25 13:35:38.260379: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 7: 2023-05-25 13:35:38.260471: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +18: 2023-05-25 13:35:38.260382: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 7: 2023-05-25 13:35:38.260489: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 7: 2023-05-25 13:35:38.260503: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 7: 2023-05-25 13:35:38.260516: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 7: 2023-05-25 13:35:38.260524: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 7: 2023-05-25 13:35:38.260527: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +23: 2023-05-25 13:35:38.260539: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +23: 2023-05-25 13:35:38.260559: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 7: 2023-05-25 13:35:38.260532: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +23: 2023-05-25 13:35:38.260569: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 7: 2023-05-25 13:35:38.260539: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +23: 2023-05-25 13:35:38.260579: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +25: 2023-05-25 13:35:38.260560: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +25: 2023-05-25 13:35:38.260576: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +23: 2023-05-25 13:35:38.260586: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +23: 2023-05-25 13:35:38.260587: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +25: 2023-05-25 13:35:38.260590: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +23: 2023-05-25 13:35:38.260595: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +23: 2023-05-25 13:35:38.260597: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +25: 2023-05-25 13:35:38.260609: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +25: 2023-05-25 13:35:38.260618: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +25: 2023-05-25 13:35:38.260620: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +25: 2023-05-25 13:35:38.260625: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +25: 2023-05-25 13:35:38.260627: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.259757: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:35:38.259745: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:35:38.260460: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.260482: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.260490: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.260499: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.260515: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.260518: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.260526: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. + 0: 2023-05-25 13:35:38.260529: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +19: 2023-05-25 13:36:07.951358: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.951380: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.951387: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.951404: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.951406: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.951418: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.951423: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.951433: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.952325: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.952343: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.952354: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.952366: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.952390: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.952373: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.952382: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.952418: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.952385: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.952435: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.952387: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.952454: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.952479: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.952486: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.952484: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.952511: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.952701: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.952721: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.952742: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.952773: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.952773: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.952783: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.952788: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.952792: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:07.953142: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:07.953162: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:07.953170: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:07.953186: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:07.953195: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:07.953199: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:07.953202: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:07.953207: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.953388: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.953403: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.953419: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.953425: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.953433: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.953435: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.953445: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.953455: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.953765: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.953785: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.953806: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.953814: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.953827: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.953831: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.953837: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.953838: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:07.954876: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:07.954894: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:07.954901: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:07.954914: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:07.954929: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:07.954924: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:07.954932: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:07.954942: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.955273: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.955293: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.955328: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.955327: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.955339: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.955337: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.955345: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.955356: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.955476: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.955504: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.955528: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.955544: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.955562: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.955570: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.955718: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.955667: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.955720: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.955700: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.955696: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.955712: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.955744: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.955714: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.955757: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.955731: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.955788: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.955726: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.955791: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.955744: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.955808: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.955746: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.955758: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.955764: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.956100: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.956134: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.956149: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.956172: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.956197: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.956205: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.956230: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.956229: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.956312: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.956338: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.956359: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.956392: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.956403: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.956399: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.956423: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.956445: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.956548: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.956583: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.956599: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.956609: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.956618: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.956627: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.956634: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.956641: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.976978: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.976983: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.976984: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.976985: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.976994: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.976991: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.977003: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +26: 2023-05-25 13:36:07.976987: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.976987: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +26: 2023-05-25 13:36:07.977010: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +26: 2023-05-25 13:36:07.977011: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +26: 2023-05-25 13:36:07.977012: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +26: 2023-05-25 13:36:07.977021: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +26: 2023-05-25 13:36:07.977024: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +26: 2023-05-25 13:36:07.977021: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +26: 2023-05-25 13:36:07.977024: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +29: 2023-05-25 13:36:07.981974: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.982022: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.982012: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.982067: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.982077: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.982084: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.982116: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.982338: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.985007: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.985018: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.985025: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.985025: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.985043: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +19: 2023-05-25 13:36:07.985031: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.985034: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.985034: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.985043: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +19: 2023-05-25 13:36:07.985059: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +19: 2023-05-25 13:36:07.985063: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +19: 2023-05-25 13:36:07.985065: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +19: 2023-05-25 13:36:07.985068: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +19: 2023-05-25 13:36:07.985070: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +19: 2023-05-25 13:36:07.985074: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +19: 2023-05-25 13:36:07.985075: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +21: 2023-05-25 13:36:07.986489: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.986553: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.986491: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.986498: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.986498: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.986550: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.986506: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.986514: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 2: 2023-05-25 13:36:07.986667: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.986562: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.986514: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 1: 2023-05-25 13:36:07.986563: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.986562: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.986511: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.986562: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.986552: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.986515: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.986534: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +13: 2023-05-25 13:36:07.986568: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.986552: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.986535: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +21: 2023-05-25 13:36:07.986536: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +21: 2023-05-25 13:36:07.986549: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 2: 2023-05-25 13:36:07.986672: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.986568: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.986555: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.986549: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 2: 2023-05-25 13:36:07.986676: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.986573: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.986585: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +31: 2023-05-25 13:36:07.986560: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.986687: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +21: 2023-05-25 13:36:07.986712: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 2: 2023-05-25 13:36:07.986673: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.986686: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +13: 2023-05-25 13:36:07.986574: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.986594: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +31: 2023-05-25 13:36:07.986561: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.986675: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.986593: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +13: 2023-05-25 13:36:07.986599: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +13: 2023-05-25 13:36:07.986600: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +31: 2023-05-25 13:36:07.986563: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.986675: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +13: 2023-05-25 13:36:07.986602: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +13: 2023-05-25 13:36:07.986605: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +13: 2023-05-25 13:36:07.986608: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +31: 2023-05-25 13:36:07.986565: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.986585: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 2: 2023-05-25 13:36:07.986673: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.986587: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +31: 2023-05-25 13:36:07.986589: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +31: 2023-05-25 13:36:07.986593: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 2: 2023-05-25 13:36:07.986677: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 2: 2023-05-25 13:36:07.986706: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +31: 2023-05-25 13:36:07.986586: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +31: 2023-05-25 13:36:07.986591: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +31: 2023-05-25 13:36:07.986598: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 2: 2023-05-25 13:36:07.986705: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 2: 2023-05-25 13:36:07.986705: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 2: 2023-05-25 13:36:07.986708: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +23: 2023-05-25 13:36:07.986737: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +31: 2023-05-25 13:36:07.986598: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 2: 2023-05-25 13:36:07.986712: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 2: 2023-05-25 13:36:07.986713: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 2: 2023-05-25 13:36:07.986712: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +27: 2023-05-25 13:36:07.986813: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.986740: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.986816: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.986741: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.986818: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.986873: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.986749: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.986821: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.986748: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.986819: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.986750: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.986826: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.986752: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.986826: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.986878: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.986898: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +23: 2023-05-25 13:36:07.986756: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.986771: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +27: 2023-05-25 13:36:07.986833: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +27: 2023-05-25 13:36:07.986855: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +29: 2023-05-25 13:36:07.986899: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +29: 2023-05-25 13:36:07.986887: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.986773: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +23: 2023-05-25 13:36:07.986774: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +23: 2023-05-25 13:36:07.986781: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +27: 2023-05-25 13:36:07.986856: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +27: 2023-05-25 13:36:07.986854: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +27: 2023-05-25 13:36:07.986855: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +29: 2023-05-25 13:36:07.986887: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.986780: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +23: 2023-05-25 13:36:07.986783: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +23: 2023-05-25 13:36:07.986786: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +27: 2023-05-25 13:36:07.986859: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +27: 2023-05-25 13:36:07.986863: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +27: 2023-05-25 13:36:07.986860: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +29: 2023-05-25 13:36:07.986887: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +23: 2023-05-25 13:36:07.986787: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +27: 2023-05-25 13:36:07.986862: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +29: 2023-05-25 13:36:07.986896: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.986897: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.986896: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +29: 2023-05-25 13:36:07.986929: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +29: 2023-05-25 13:36:07.986932: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +29: 2023-05-25 13:36:07.986932: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +29: 2023-05-25 13:36:07.986939: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +29: 2023-05-25 13:36:07.986938: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +29: 2023-05-25 13:36:07.986938: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 4: 2023-05-25 13:36:07.987103: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.987137: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.987151: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.987181: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.987185: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.987190: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.987206: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.987245: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.987967: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.987982: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +20: 2023-05-25 13:36:07.987985: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.987988: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.987991: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.987992: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.987993: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.987995: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.988009: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +20: 2023-05-25 13:36:07.988016: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +20: 2023-05-25 13:36:07.988019: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +20: 2023-05-25 13:36:07.988021: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +20: 2023-05-25 13:36:07.988023: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +20: 2023-05-25 13:36:07.988027: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +20: 2023-05-25 13:36:07.988135: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +20: 2023-05-25 13:36:07.988156: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 9: 2023-05-25 13:36:07.990282: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.990283: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.990284: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.990286: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.990285: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.990287: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.990287: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.990291: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 9: 2023-05-25 13:36:07.990298: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 9: 2023-05-25 13:36:07.990299: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 9: 2023-05-25 13:36:07.990299: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 9: 2023-05-25 13:36:07.990306: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 9: 2023-05-25 13:36:07.990308: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 9: 2023-05-25 13:36:07.990305: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 9: 2023-05-25 13:36:07.990306: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 9: 2023-05-25 13:36:07.990308: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +14: 2023-05-25 13:36:07.992645: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:07.992649: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:07.992680: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +14: 2023-05-25 13:36:07.992673: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:07.992673: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:07.992681: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +14: 2023-05-25 13:36:07.992674: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:07.992690: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:07.992714: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +14: 2023-05-25 13:36:07.992717: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +14: 2023-05-25 13:36:07.992721: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +14: 2023-05-25 13:36:07.992725: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +14: 2023-05-25 13:36:07.992763: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:07.992801: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +14: 2023-05-25 13:36:07.992808: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +14: 2023-05-25 13:36:07.992842: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +15: 2023-05-25 13:36:07.993001: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.993009: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.993008: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.993008: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.993017: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +15: 2023-05-25 13:36:07.993013: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.993013: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.993011: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.993023: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +15: 2023-05-25 13:36:07.993016: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +15: 2023-05-25 13:36:07.993029: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +15: 2023-05-25 13:36:07.993029: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +15: 2023-05-25 13:36:07.993031: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +15: 2023-05-25 13:36:07.993032: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +15: 2023-05-25 13:36:07.993034: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +15: 2023-05-25 13:36:07.993035: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 0: 2023-05-25 13:36:07.993654: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.993815: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.993816: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.993825: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.993825: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.993825: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.993838: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 4: 2023-05-25 13:36:07.993830: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.993844: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 4: 2023-05-25 13:36:07.993830: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.993829: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.993854: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 4: 2023-05-25 13:36:07.993853: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 4: 2023-05-25 13:36:07.993853: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 4: 2023-05-25 13:36:07.993861: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +25: 2023-05-25 13:36:07.994026: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 4: 2023-05-25 13:36:07.993864: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 4: 2023-05-25 13:36:07.993865: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +25: 2023-05-25 13:36:07.994029: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:07.994030: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:07.994029: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:07.994036: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:07.994032: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:07.994036: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:07.994035: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +25: 2023-05-25 13:36:07.994066: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +25: 2023-05-25 13:36:07.994070: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +25: 2023-05-25 13:36:07.994070: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +25: 2023-05-25 13:36:07.994074: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +25: 2023-05-25 13:36:07.994072: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +25: 2023-05-25 13:36:07.994072: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +25: 2023-05-25 13:36:07.994078: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +25: 2023-05-25 13:36:07.994078: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +22: 2023-05-25 13:36:07.997506: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.997525: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.997530: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.997544: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.997552: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.997671: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.997673: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.997675: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.998468: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.998468: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.998467: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.998472: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.998472: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.998474: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.998471: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.998478: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.998486: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +18: 2023-05-25 13:36:07.998525: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +22: 2023-05-25 13:36:07.998487: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +22: 2023-05-25 13:36:07.998488: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +22: 2023-05-25 13:36:07.998491: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +22: 2023-05-25 13:36:07.998492: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +22: 2023-05-25 13:36:07.998492: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +22: 2023-05-25 13:36:07.998493: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +22: 2023-05-25 13:36:07.998493: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +18: 2023-05-25 13:36:07.998559: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:07.998572: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:07.998597: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:07.998617: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:07.998626: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:07.998630: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:07.998812: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:07.999970: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:07.999994: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +18: 2023-05-25 13:36:07.999987: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:07.999994: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:07.999996: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.000002: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.000003: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.000009: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.000006: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +18: 2023-05-25 13:36:08.000031: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +18: 2023-05-25 13:36:08.000039: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +18: 2023-05-25 13:36:08.000041: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +18: 2023-05-25 13:36:08.000044: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +18: 2023-05-25 13:36:08.000046: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +18: 2023-05-25 13:36:08.000050: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +18: 2023-05-25 13:36:08.000049: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 7: 2023-05-25 13:36:08.000184: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:08.000219: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:08.000249: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:08.000264: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:08.000287: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:08.000296: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:08.000479: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:08.000500: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:08.000535: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:08.000775: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:08.000548: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:08.000557: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:08.000571: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:08.000801: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:08.000574: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:08.000811: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:08.000582: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:08.000822: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:08.000583: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:08.000835: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:08.000701: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:08.000840: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:08.000846: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:08.000976: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:08.001425: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:08.001486: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:08.001487: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:08.001487: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:08.001490: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:08.001494: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:08.001495: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:08.001503: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +28: 2023-05-25 13:36:08.001503: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +28: 2023-05-25 13:36:08.001503: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +28: 2023-05-25 13:36:08.001508: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +28: 2023-05-25 13:36:08.001509: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +28: 2023-05-25 13:36:08.001499: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:08.001499: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +28: 2023-05-25 13:36:08.001510: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +28: 2023-05-25 13:36:08.001522: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +28: 2023-05-25 13:36:08.001523: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +17: 2023-05-25 13:36:08.001848: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:08.001850: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:08.001850: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:08.001853: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:08.001853: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:08.001856: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:08.001856: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:08.001858: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +17: 2023-05-25 13:36:08.001865: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +17: 2023-05-25 13:36:08.001869: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +17: 2023-05-25 13:36:08.001868: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +17: 2023-05-25 13:36:08.001869: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +17: 2023-05-25 13:36:08.001872: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +17: 2023-05-25 13:36:08.001874: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +17: 2023-05-25 13:36:08.001876: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +17: 2023-05-25 13:36:08.001876: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +24: 2023-05-25 13:36:08.002075: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:08.002091: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:08.002110: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:08.002118: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:08.002128: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:08.002132: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:08.002266: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:08.002267: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:08.003139: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:08.003140: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:08.003141: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:08.003143: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:08.003144: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:08.003147: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:08.003147: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:08.003155: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +24: 2023-05-25 13:36:08.003155: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +24: 2023-05-25 13:36:08.003162: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +24: 2023-05-25 13:36:08.003153: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +24: 2023-05-25 13:36:08.003163: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +24: 2023-05-25 13:36:08.003165: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +24: 2023-05-25 13:36:08.003164: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +24: 2023-05-25 13:36:08.003168: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +24: 2023-05-25 13:36:08.003172: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 6: 2023-05-25 13:36:08.003382: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.003401: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.003427: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.003427: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.003438: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.003554: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.003556: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.003557: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:08.003631: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:08.003645: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:08.003657: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:08.003664: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:08.003672: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:08.003682: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:08.003686: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:08.003794: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004323: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004330: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004328: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004332: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004342: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 6: 2023-05-25 13:36:08.004335: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004335: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004346: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 6: 2023-05-25 13:36:08.004347: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 6: 2023-05-25 13:36:08.004353: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 6: 2023-05-25 13:36:08.004353: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 6: 2023-05-25 13:36:08.004351: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 6: 2023-05-25 13:36:08.004343: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004345: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 6: 2023-05-25 13:36:08.004370: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 6: 2023-05-25 13:36:08.004370: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +30: 2023-05-25 13:36:08.004585: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:08.004587: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:08.004586: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:08.004588: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:08.004589: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:08.004591: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:08.004592: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:08.004591: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +30: 2023-05-25 13:36:08.004607: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +30: 2023-05-25 13:36:08.004608: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +30: 2023-05-25 13:36:08.004606: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +30: 2023-05-25 13:36:08.004608: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +30: 2023-05-25 13:36:08.004606: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +30: 2023-05-25 13:36:08.004610: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +30: 2023-05-25 13:36:08.004611: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +30: 2023-05-25 13:36:08.004611: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +10: 2023-05-25 13:36:08.005582: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:08.005606: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:08.005613: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:08.005628: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:08.005634: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:08.005637: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:08.005647: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:08.005649: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.006075: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.006091: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.006106: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.006113: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.006122: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.006127: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.006138: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.006144: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:08.006360: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:08.006379: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:08.006387: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:08.006396: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:08.006411: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:08.006414: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:08.006488: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:08.006420: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:08.006423: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:08.006488: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:08.006492: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:08.006492: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:08.006499: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:08.006495: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:08.006497: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:08.006514: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +10: 2023-05-25 13:36:08.006514: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +10: 2023-05-25 13:36:08.006515: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +10: 2023-05-25 13:36:08.006515: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +10: 2023-05-25 13:36:08.006516: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +10: 2023-05-25 13:36:08.006517: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +10: 2023-05-25 13:36:08.006516: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +10: 2023-05-25 13:36:08.006585: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +10: 2023-05-25 13:36:08.006601: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +11: 2023-05-25 13:36:08.006741: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:08.006719: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:08.006762: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:08.006784: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:08.006742: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:08.006791: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:08.006737: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:08.006803: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:08.006759: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:08.006806: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:08.006758: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:08.007007: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:08.006771: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:08.007009: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:08.006786: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:08.006884: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:08.006869: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.007020: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:08.006885: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:08.006900: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.007028: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:08.006900: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.007029: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:08.006910: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.007031: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:08.006915: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.007031: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.007039: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +12: 2023-05-25 13:36:08.006921: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.007031: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:08.006930: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.007033: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.007035: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 8: 2023-05-25 13:36:08.007050: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 8: 2023-05-25 13:36:08.007050: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 8: 2023-05-25 13:36:08.007050: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 8: 2023-05-25 13:36:08.007051: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 8: 2023-05-25 13:36:08.007053: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 8: 2023-05-25 13:36:08.007055: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 8: 2023-05-25 13:36:08.007055: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +16: 2023-05-25 13:36:08.007299: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:08.007301: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:08.007305: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:08.007305: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:08.007308: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:08.007309: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:08.007310: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:08.007310: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +16: 2023-05-25 13:36:08.007319: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +16: 2023-05-25 13:36:08.007319: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +16: 2023-05-25 13:36:08.007320: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +16: 2023-05-25 13:36:08.007324: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +16: 2023-05-25 13:36:08.007324: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +16: 2023-05-25 13:36:08.007326: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +16: 2023-05-25 13:36:08.007327: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +16: 2023-05-25 13:36:08.007327: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 5: 2023-05-25 13:36:08.007336: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:08.007703: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.007359: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.007366: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:08.007704: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.007370: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:08.007706: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.007393: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:08.007704: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.007399: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:08.007706: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.007406: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:08.007704: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.007412: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:08.007753: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:08.007706: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:08.007708: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:08.007722: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 3: 2023-05-25 13:36:08.007722: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 3: 2023-05-25 13:36:08.007724: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 3: 2023-05-25 13:36:08.007726: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 3: 2023-05-25 13:36:08.007727: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 3: 2023-05-25 13:36:08.007728: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 3: 2023-05-25 13:36:08.007729: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +12: 2023-05-25 13:36:08.007760: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 3: 2023-05-25 13:36:08.007729: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +12: 2023-05-25 13:36:08.007760: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:08.007769: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +12: 2023-05-25 13:36:08.007761: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:08.007761: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:08.007901: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:08.007767: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:08.007771: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:08.007766: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +12: 2023-05-25 13:36:08.007786: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +12: 2023-05-25 13:36:08.007787: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +12: 2023-05-25 13:36:08.007788: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +12: 2023-05-25 13:36:08.007789: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +12: 2023-05-25 13:36:08.007791: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +12: 2023-05-25 13:36:08.007792: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +12: 2023-05-25 13:36:08.007793: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 5: 2023-05-25 13:36:08.008453: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.008468: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 5: 2023-05-25 13:36:08.008474: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.008473: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.008476: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.008478: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.008478: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.008480: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.008482: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 5: 2023-05-25 13:36:08.008499: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 5: 2023-05-25 13:36:08.008499: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 5: 2023-05-25 13:36:08.008500: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 5: 2023-05-25 13:36:08.008501: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 5: 2023-05-25 13:36:08.008501: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 5: 2023-05-25 13:36:08.008502: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 5: 2023-05-25 13:36:08.008503: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 1: 2023-05-25 13:36:07.986563: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986566: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986572: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986566: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986580: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986572: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986575: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 1: 2023-05-25 13:36:07.986605: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 1: 2023-05-25 13:36:07.986605: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 1: 2023-05-25 13:36:07.986607: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 1: 2023-05-25 13:36:07.986608: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 1: 2023-05-25 13:36:07.986611: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 1: 2023-05-25 13:36:07.986610: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 1: 2023-05-25 13:36:07.986614: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 1: 2023-05-25 13:36:07.986615: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 0: 2023-05-25 13:36:07.993655: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.993654: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.993655: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.993655: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.993663: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.993659: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.993659: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 0: 2023-05-25 13:36:07.993679: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 0: 2023-05-25 13:36:07.993678: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 0: 2023-05-25 13:36:07.993678: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 0: 2023-05-25 13:36:07.993680: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 0: 2023-05-25 13:36:07.993680: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 0: 2023-05-25 13:36:07.993681: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 0: 2023-05-25 13:36:07.993680: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 0: 2023-05-25 13:36:07.993681: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 7: 2023-05-25 13:36:08.001429: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:08.001448: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 7: 2023-05-25 13:36:08.001448: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 7: 2023-05-25 13:36:08.001445: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:08.001450: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:08.001454: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:08.001457: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:08.001470: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 7: 2023-05-25 13:36:08.001464: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:08.001482: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 7: 2023-05-25 13:36:08.001481: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 7: 2023-05-25 13:36:08.001484: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 7: 2023-05-25 13:36:08.001488: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 7: 2023-05-25 13:36:08.001637: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 + 7: 2023-05-25 13:36:08.001663: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +11: 2023-05-25 13:36:08.007910: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:08.007916: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +11: 2023-05-25 13:36:08.007911: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:08.007914: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:08.007924: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +11: 2023-05-25 13:36:08.007916: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:08.007920: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:08.007919: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:08.007936: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +11: 2023-05-25 13:36:08.007937: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +11: 2023-05-25 13:36:08.007938: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +11: 2023-05-25 13:36:08.007927: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps-rocm-5.2.3/aws-ofi-rccl:/opt/rocm/lib64:/opt/rocm/lib:/opt/rocm/rocprofiler/lib:/opt/rocm/rocprofiler/tool:/opt/rocm/roctracer/lib:/opt/rocm/roctracer/tool:/opt/rocm/hip/lib:/opt/cray/pe/python/3.9.13.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64 +11: 2023-05-25 13:36:08.007942: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +11: 2023-05-25 13:36:08.007943: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +11: 2023-05-25 13:36:08.007950: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. + 0: Loading extension module scaled_upper_triang_masked_softmax_cuda... + 0: Loading extension module scaled_masked_softmax_cuda... + 0: Successfully preprocessed all matching files. + 0: Detected CUDA files, patching ldflags + 0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... + 0: Building extension module fused_mix_prec_layer_norm_cuda... + 0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) + 0: Loading extension module fused_mix_prec_layer_norm_cuda... +12: Successfully preprocessed all matching files. + 0: Successfully preprocessed all matching files. + 0: Successfully preprocessed all matching files. +18: Successfully preprocessed all matching files. +18: Successfully preprocessed all matching files. + 7: Successfully preprocessed all matching files. + 7: Successfully preprocessed all matching files. +11: Successfully preprocessed all matching files. +31: Successfully preprocessed all matching files. +31: Successfully preprocessed all matching files. +31: Successfully preprocessed all matching files. +31: Successfully preprocessed all matching files. +24: Successfully preprocessed all matching files. +24: Successfully preprocessed all matching files. +31: Successfully preprocessed all matching files. + 7: Successfully preprocessed all matching files. +12: Successfully preprocessed all matching files. +14: Successfully preprocessed all matching files. + 8: Successfully preprocessed all matching files. + 8: Successfully preprocessed all matching files. + 6: Successfully preprocessed all matching files. + 8: Successfully preprocessed all matching files. + 8: Successfully preprocessed all matching files. +18: Successfully preprocessed all matching files. +15: Successfully preprocessed all matching files. +14: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +14: warnings.warn( +14: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +14: warnings.warn( +14: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +14: warnings.warn( +14: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +14: warnings.warn( +14: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +14: warnings.warn( +14: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +14: warnings.warn( +14: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +14: warnings.warn( +14: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +14: warnings.warn( +13: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +13: warnings.warn( +13: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +13: warnings.warn( +16: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +16: warnings.warn( +16: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +16: warnings.warn( +13: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +13: warnings.warn( +13: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +13: warnings.warn( +13: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +13: warnings.warn( +13: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +13: warnings.warn( +13: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +13: warnings.warn( +21: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +21: warnings.warn( +11: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +11: warnings.warn( +11: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +11: warnings.warn( +11: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +11: warnings.warn( +17: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +17: warnings.warn( +17: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +17: warnings.warn( +17: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +17: warnings.warn( +21: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +21: warnings.warn( +21: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +21: warnings.warn( +13: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +13: warnings.warn( +21: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +21: warnings.warn( +21: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +21: warnings.warn( +11: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +11: warnings.warn( +11: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +11: warnings.warn( +11: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +11: warnings.warn( +21: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +21: warnings.warn( +11: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +11: warnings.warn( +22: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +22: warnings.warn( +22: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +22: warnings.warn( +21: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +21: warnings.warn( +16: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +16: warnings.warn( +11: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +11: warnings.warn( +15: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +15: warnings.warn( +15: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +15: warnings.warn( +18: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +18: warnings.warn( +18: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +18: warnings.warn( +18: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +18: warnings.warn( +10: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +10: warnings.warn( +10: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +10: warnings.warn( +10: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +10: warnings.warn( +22: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +22: warnings.warn( +22: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +22: warnings.warn( +17: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +17: warnings.warn( +21: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +21: warnings.warn( +16: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +16: warnings.warn( +16: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +16: warnings.warn( +16: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +16: warnings.warn( +22: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +22: warnings.warn( +17: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +17: warnings.warn( +17: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +17: warnings.warn( +17: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +17: warnings.warn( +16: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +16: warnings.warn( +22: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +22: warnings.warn( +12: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +12: warnings.warn( +12: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +12: warnings.warn( + 9: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 9: warnings.warn( + 9: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 9: warnings.warn( + 9: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 9: warnings.warn( +15: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +15: warnings.warn( +15: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +15: warnings.warn( +15: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +15: warnings.warn( +22: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +22: warnings.warn( +16: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +16: warnings.warn( +15: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +15: warnings.warn( +15: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +15: warnings.warn( +10: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +10: warnings.warn( +17: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +17: warnings.warn( +10: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +10: warnings.warn( +10: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +10: warnings.warn( +10: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +10: warnings.warn( + 9: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 9: warnings.warn( + 9: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 9: warnings.warn( + 9: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 9: warnings.warn( +22: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +22: warnings.warn( + 9: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 9: warnings.warn( +15: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +15: warnings.warn( +10: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +10: warnings.warn( + 8: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 8: warnings.warn( + 8: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 8: warnings.warn( + 8: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 8: warnings.warn( + 8: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 8: warnings.warn( + 8: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 8: warnings.warn( + 8: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 8: warnings.warn( + 8: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 8: warnings.warn( +19: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +19: warnings.warn( +19: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +19: warnings.warn( +19: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +19: warnings.warn( + 9: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 9: warnings.warn( + 8: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 8: warnings.warn( +18: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +18: warnings.warn( +18: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +18: warnings.warn( +18: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +18: warnings.warn( +19: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +19: warnings.warn( +18: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +18: warnings.warn( +19: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +19: warnings.warn( +19: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +19: warnings.warn( +12: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +12: warnings.warn( +12: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +12: warnings.warn( +12: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +12: warnings.warn( +19: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +19: warnings.warn( +12: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +12: warnings.warn( +12: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +12: warnings.warn( +20: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +20: warnings.warn( +18: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +18: warnings.warn( +23: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +23: warnings.warn( +23: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +23: warnings.warn( +23: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +23: warnings.warn( +12: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +12: warnings.warn( +20: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +20: warnings.warn( +19: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +19: warnings.warn( +23: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +23: warnings.warn( +23: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +23: warnings.warn( +20: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +20: warnings.warn( +20: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +20: warnings.warn( +23: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +23: warnings.warn( +20: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +20: warnings.warn( +20: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +20: warnings.warn( +23: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +23: warnings.warn( +20: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +20: warnings.warn( +23: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +23: warnings.warn( +20: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +20: warnings.warn( +25: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +25: warnings.warn( +25: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +25: warnings.warn( +25: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +25: warnings.warn( + 1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 1: warnings.warn( + 1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 1: warnings.warn( + 1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 1: warnings.warn( +25: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +25: warnings.warn( +25: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +25: warnings.warn( + 1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 1: warnings.warn( + 1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 1: warnings.warn( + 1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 1: warnings.warn( +25: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +25: warnings.warn( +25: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +25: warnings.warn( + 1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 1: warnings.warn( +24: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +24: warnings.warn( + 4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 4: warnings.warn( + 4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 4: warnings.warn( +28: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +28: warnings.warn( +28: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +28: warnings.warn( +28: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +28: warnings.warn( + 4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 4: warnings.warn( +28: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +28: warnings.warn( + 4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 4: warnings.warn( +28: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +28: warnings.warn( + 4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 4: warnings.warn( +27: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +27: warnings.warn( + 5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 5: warnings.warn( + 5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 5: warnings.warn( + 4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 4: warnings.warn( + 5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 5: warnings.warn( +28: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +28: warnings.warn( + 4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 4: warnings.warn( + 7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 7: warnings.warn( + 7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 7: warnings.warn( +28: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +28: warnings.warn( + 1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 1: warnings.warn( +25: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +25: warnings.warn( +31: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +31: warnings.warn( +31: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +31: warnings.warn( + 7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 7: warnings.warn( +31: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +31: warnings.warn( + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 0: warnings.warn( + 5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 5: warnings.warn( +24: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +24: warnings.warn( + 7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 7: warnings.warn( + 7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 7: warnings.warn( + 7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 7: warnings.warn( +31: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +31: warnings.warn( +31: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +31: warnings.warn( +31: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +31: warnings.warn( + 3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 3: warnings.warn( +29: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +29: warnings.warn( +29: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +29: warnings.warn( +29: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +29: warnings.warn( + 4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 4: warnings.warn( +28: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +28: warnings.warn( +29: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +29: warnings.warn( +27: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +27: warnings.warn( + 7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 7: warnings.warn( +31: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +31: warnings.warn( + 7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 7: warnings.warn( +31: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +31: warnings.warn( + 3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 3: warnings.warn( + 6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 6: warnings.warn( + 6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 6: warnings.warn( + 6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 6: warnings.warn( + 6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 6: warnings.warn( + 6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 6: warnings.warn( + 6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 6: warnings.warn( +26: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +26: warnings.warn( +26: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +26: warnings.warn( +26: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +26: warnings.warn( +26: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +26: warnings.warn( +26: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +26: warnings.warn( +26: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +26: warnings.warn( + 2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 2: warnings.warn( + 2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 2: warnings.warn( + 2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 2: warnings.warn( + 2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 2: warnings.warn( + 2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 2: warnings.warn( +30: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +30: warnings.warn( +30: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +30: warnings.warn( + 2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 2: warnings.warn( +30: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +30: warnings.warn( +30: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +30: warnings.warn( +30: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +30: warnings.warn( +26: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +26: warnings.warn( +30: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +30: warnings.warn( + 5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 5: warnings.warn( + 5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 5: warnings.warn( + 5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 5: warnings.warn( + 2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 2: warnings.warn( + 6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 6: warnings.warn( + 5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 5: warnings.warn( + 6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 6: warnings.warn( +30: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +30: warnings.warn( +29: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +29: warnings.warn( +30: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +30: warnings.warn( +29: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +29: warnings.warn( +29: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +29: warnings.warn( +29: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +29: warnings.warn( +24: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +24: warnings.warn( +24: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +24: warnings.warn( + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 0: warnings.warn( + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 0: warnings.warn( + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 0: warnings.warn( + 3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 3: warnings.warn( +27: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +27: warnings.warn( +24: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +24: warnings.warn( +27: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +27: warnings.warn( +27: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +27: warnings.warn( +27: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +27: warnings.warn( +27: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +27: warnings.warn( + 3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 3: warnings.warn( + 3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 3: warnings.warn( +24: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +24: warnings.warn( + 3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 3: warnings.warn( + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 0: warnings.warn( + 3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 3: warnings.warn( + 3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 3: warnings.warn( +27: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +27: warnings.warn( + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 0: warnings.warn( + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 0: warnings.warn( +24: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +24: warnings.warn( +24: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +24: warnings.warn( + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 0: warnings.warn( +26: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +26: warnings.warn( + 2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead + 2: warnings.warn( + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... + 8: Building extension module utils... + 8: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: +16: +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: +17: +17: +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: + 8: Loading extension module utils... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... + 4: Building extension module utils... + 4: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) + 4: Loading extension module utils... +27: Loading extension module utils... +29: Loading extension module utils... +31: Loading extension module utils... +25: Loading extension module utils... +19: Loading extension module utils... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: Loading extension module utils... + 0: Loading extension module utils... +12: Loading extension module utils... + 1: Loading extension module utils... +10: Loading extension module utils... + 1: Loading extension module utils... +27: Loading extension module utils... +10: Loading extension module utils... + 1: Loading extension module utils... + 0: Loading extension module utils... + 1: Loading extension module utils... + 2: Loading extension module utils... + 2: Loading extension module utils... +21: Loading extension module utils... +21: Loading extension module utils... + 7: Loading extension module utils... + 7: Loading extension module utils... + 1: Loading extension module utils... + 1: Loading extension module utils... + 7: Loading extension module utils... + 2: Loading extension module utils... + 4: Loading extension module utils... + 7: Loading extension module utils... + 7: Loading extension module utils... + 2: Loading extension module utils... + 4: Loading extension module utils... + 5: Loading extension module utils... + 2: Loading extension module utils... + 7: Loading extension module utils... + 4: Loading extension module utils... + 2: Loading extension module utils... + 5: Loading extension module utils... + 4: Loading extension module utils... + 0: Loading extension module utils... + 5: Loading extension module utils... + 5: Loading extension module utils... + 6: Loading extension module utils... + 6: Loading extension module utils... + 3: Loading extension module utils... + 6: Loading extension module utils... + 3: Loading extension module utils... + 6: Loading extension module utils... + 3: Loading extension module utils... + 6: Loading extension module utils... + 3: Loading extension module utils... + 6: Loading extension module utils... + 2: Loading extension module utils... + 2: Loading extension module utils... +26: Loading extension module utils... +26: Loading extension module utils... + 4: Loading extension module utils... + 4: Loading extension module utils... + 3: Loading extension module utils... + 3: Loading extension module utils... + 4: Loading extension module utils... + 5: Loading extension module utils... + 5: Loading extension module utils... + 6: Loading extension module utils... + 3: Loading extension module utils... + 3: Loading extension module utils... + 6: Loading extension module utils... + 5: Loading extension module utils... + 5: Loading extension module utils... + 7: Loading extension module utils... + 7: Loading extension module utils... + 1: Loading extension module utils... + 1: Loading extension module utils... + 8: Loading extension module utils... + 8: Loading extension module utils... + 0: Loading extension module utils... + 9: Loading extension module utils... + 9: Loading extension module utils... +11: Loading extension module utils... +11: Loading extension module utils... +14: Loading extension module utils... +14: Loading extension module utils... +12: Loading extension module utils... +15: Loading extension module utils... +13: Loading extension module utils... +12: Loading extension module utils... +13: Loading extension module utils... +15: Loading extension module utils... +10: Loading extension module utils... +18: Loading extension module utils... +10: Loading extension module utils... +18: Loading extension module utils... + 0: Loading extension module utils... +24: Loading extension module utils... +24: Loading extension module utils... +17: Loading extension module utils... +17: Loading extension module utils... +17: Loading extension module utils... +17: Loading extension module utils... +17: Loading extension module utils... +17: Loading extension module utils... +16: Loading extension module utils... +16: Loading extension module utils... +16: Loading extension module utils... +16: Loading extension module utils... +16: Loading extension module utils... +16: Loading extension module utils... +26: Loading extension module utils... +26: Loading extension module utils... +28: Loading extension module utils... +28: Loading extension module utils... +24: Loading extension module utils... +31: Loading extension module utils... +27: Loading extension module utils... +26: Loading extension module utils... +29: Loading extension module utils... +28: Loading extension module utils... +24: Loading extension module utils... +27: Loading extension module utils... +31: Loading extension module utils... +19: Loading extension module utils... +25: Loading extension module utils... +30: Loading extension module utils... +29: Loading extension module utils... +27: Loading extension module utils... +25: Loading extension module utils... +26: Loading extension module utils... +30: Loading extension module utils... +28: Loading extension module utils... +31: Loading extension module utils... +25: Loading extension module utils... +30: Loading extension module utils... +25: Loading extension module utils... +29: Loading extension module utils... +25: Loading extension module utils... +30: Loading extension module utils... +16: Loading extension module utils... +20: Loading extension module utils... +20: Loading extension module utils... +16: Loading extension module utils... +20: Loading extension module utils... +22: Loading extension module utils... +18: Loading extension module utils... +23: Loading extension module utils... +21: Loading extension module utils... +19: Loading extension module utils... +22: Loading extension module utils... +18: Loading extension module utils... +21: Loading extension module utils... +20: Loading extension module utils... +19: Loading extension module utils... +23: Loading extension module utils... +17: Loading extension module utils... +17: Loading extension module utils... +31: Loading extension module utils... +31: Loading extension module utils... +29: Loading extension module utils... +29: Loading extension module utils... +21: Loading extension module utils... +21: Loading extension module utils... +21: Loading extension module utils... +21: Loading extension module utils... +18: Loading extension module utils... +18: Loading extension module utils... +20: Loading extension module utils... +20: Loading extension module utils... +22: Loading extension module utils... +22: Loading extension module utils... +23: Loading extension module utils... +23: Loading extension module utils... +19: Loading extension module utils... +19: Loading extension module utils... +22: Loading extension module utils... +22: Loading extension module utils... +23: Loading extension module utils... +23: Loading extension module utils... + 8: Loading extension module utils... +30: Loading extension module utils... +30: Loading extension module utils... + 9: Loading extension module utils... + 8: Loading extension module utils... +28: Loading extension module utils... + 9: Loading extension module utils... +28: Loading extension module utils... +12: Loading extension module utils... +11: Loading extension module utils... +15: Loading extension module utils... +13: Loading extension module utils... +15: Loading extension module utils... +12: Loading extension module utils... +11: Loading extension module utils... +13: Loading extension module utils... +14: Loading extension module utils... +14: Loading extension module utils... +24: Loading extension module utils... + 8: Loading extension module utils... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: No modifications detected for re-loaded extension module utils, skipping build step... + 8: Loading extension module utils... + 8: No modifications detected for re-loaded extension module utils, skipping build step... + 8: Loading extension module utils... + 8: No modifications detected for re-loaded extension module utils, skipping build step... + 8: Loading extension module utils... + 8: No modifications detected for re-loaded extension module utils, skipping build step... + 8: Loading extension module utils... + 8: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... + 8: + 8: Loading extension module utils...Loading extension module utils... + 8: +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: +31: No modifications detected for re-loaded extension module utils, skipping build step... +31: Loading extension module utils... +31: No modifications detected for re-loaded extension module utils, skipping build step... +31: Loading extension module utils... +31: No modifications detected for re-loaded extension module utils, skipping build step... +31: Loading extension module utils... +31: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +31: +31: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils...Loading extension module utils... +31: +31: +31: Loading extension module utils... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: No modifications detected for re-loaded extension module utils, skipping build step... + 4: Loading extension module utils... + 4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... + 4: + 4: + 4: + 4: Loading extension module utils...Loading extension module utils...Loading extension module utils...Loading extension module utils... + 4: + 4: + 4: +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: No modifications detected for re-loaded extension module utils, skipping build step... + 4: Loading extension module utils... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: No modifications detected for re-loaded extension module utils, skipping build step... +29: Loading extension module utils... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 4: No modifications detected for re-loaded extension module utils, skipping build step... + 4: Loading extension module utils... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: No modifications detected for re-loaded extension module utils, skipping build step... +29: Loading extension module utils... + 4: No modifications detected for re-loaded extension module utils, skipping build step... + 4: Loading extension module utils... +19: No modifications detected for re-loaded extension module utils, skipping build step... +19: Loading extension module utils... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: No modifications detected for re-loaded extension module utils, skipping build step... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: No modifications detected for re-loaded extension module utils, skipping build step... +27: Loading extension module utils... +19: Loading extension module utils... +25: No modifications detected for re-loaded extension module utils, skipping build step... +25: Loading extension module utils... +19: No modifications detected for re-loaded extension module utils, skipping build step... +19: Loading extension module utils... +27: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +27: +27: Loading extension module utils...Loading extension module utils... +27: +25: No modifications detected for re-loaded extension module utils, skipping build step... +25: Loading extension module utils... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: No modifications detected for re-loaded extension module utils, skipping build step... +29: Loading extension module utils... +29: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +29: +29: Loading extension module utils...Loading extension module utils... +29: +27: No modifications detected for re-loaded extension module utils, skipping build step... +27: Loading extension module utils... +25: No modifications detected for re-loaded extension module utils, skipping build step... +25: Loading extension module utils... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +27: +27: Loading extension module utils... +27: Loading extension module utils... +25: No modifications detected for re-loaded extension module utils, skipping build step... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Loading extension module utils... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: No modifications detected for re-loaded extension module utils, skipping build step... +29: Loading extension module utils... +19: No modifications detected for re-loaded extension module utils, skipping build step... +19: Loading extension module utils... +25: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +25: +25: Loading extension module utils...Loading extension module utils... +25: +19: No modifications detected for re-loaded extension module utils, skipping build step... +19: Loading extension module utils... +19: No modifications detected for re-loaded extension module utils, skipping build step... +19: Loading extension module utils... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: +11: +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: No modifications detected for re-loaded extension module utils, skipping build step... +11: Loading extension module utils... +11: No modifications detected for re-loaded extension module utils, skipping build step... +11: Loading extension module utils... +11: No modifications detected for re-loaded extension module utils, skipping build step... +11: Loading extension module utils... +11: No modifications detected for re-loaded extension module utils, skipping build step... +11: Loading extension module utils... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: No modifications detected for re-loaded extension module utils, skipping build step... +26: Loading extension module utils... +26: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +26: +26: Loading extension module utils... +26: Loading extension module utils... +26: No modifications detected for re-loaded extension module utils, skipping build step... +26: Loading extension module utils... +26: No modifications detected for re-loaded extension module utils, skipping build step... +26: Loading extension module utils... +26: No modifications detected for re-loaded extension module utils, skipping build step... +26: Loading extension module utils... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: No modifications detected for re-loaded extension module utils, skipping build step... + 0: Loading extension module utils... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: No modifications detected for re-loaded extension module utils, skipping build step... + 0: Loading extension module utils... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: No modifications detected for re-loaded extension module utils, skipping build step... + 0: Loading extension module utils... +14: No modifications detected for re-loaded extension module utils, skipping build step... +14: Loading extension module utils... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: No modifications detected for re-loaded extension module utils, skipping build step... +22: Loading extension module utils... +14: No modifications detected for re-loaded extension module utils, skipping build step... +14: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +14: +14: Loading extension module utils... +22: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +22: +22: Loading extension module utils...Loading extension module utils... +22: +14: No modifications detected for re-loaded extension module utils, skipping build step... +14: Loading extension module utils... + 0: No modifications detected for re-loaded extension module utils, skipping build step... + 0: Loading extension module utils... +22: No modifications detected for re-loaded extension module utils, skipping build step... +22: Loading extension module utils... +22: No modifications detected for re-loaded extension module utils, skipping build step... +22: Loading extension module utils... +22: No modifications detected for re-loaded extension module utils, skipping build step... +22: Loading extension module utils... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: + 7: + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 7: + 7: No modifications detected for re-loaded extension module utils, skipping build step... + 7: Loading extension module utils... + 7: No modifications detected for re-loaded extension module utils, skipping build step... + 7: Loading extension module utils... + 7: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... + 7: + 7: Loading extension module utils...Loading extension module utils... + 7: + 7: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... + 7: + 7: + 7: Loading extension module utils...Loading extension module utils... + 7: Loading extension module utils... + 7: + 7: No modifications detected for re-loaded extension module utils, skipping build step... + 7: Loading extension module utils... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: + 1: + 1: No modifications detected for re-loaded extension module utils, skipping build step... + 1: Loading extension module utils... + 1: No modifications detected for re-loaded extension module utils, skipping build step... + 1: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... + 1: + 1: Loading extension module utils... + 1: No modifications detected for re-loaded extension module utils, skipping build step... + 1: Loading extension module utils... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... + 1: + 1: Loading extension module utils...Loading extension module utils... + 1: + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 1: No modifications detected for re-loaded extension module utils, skipping build step... + 1: Loading extension module utils... + 1: No modifications detected for re-loaded extension module utils, skipping build step... + 1: Loading extension module utils... + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: + 5: + 5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 5: No modifications detected for re-loaded extension module utils, skipping build step... + 5: Loading extension module utils... + 5: No modifications detected for re-loaded extension module utils, skipping build step... + 5: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... + 5: + 5: Loading extension module utils... + 5: No modifications detected for re-loaded extension module utils, skipping build step... + 5: Loading extension module utils... + 5: No modifications detected for re-loaded extension module utils, skipping build step... + 5: Loading extension module utils... + 5: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... + 5: + 5: Loading extension module utils... + 5: Loading extension module utils... + 5: No modifications detected for re-loaded extension module utils, skipping build step... + 5: Loading extension module utils... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: No modifications detected for re-loaded extension module utils, skipping build step... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Loading extension module utils... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +16: +15: No modifications detected for re-loaded extension module utils, skipping build step... +15: Loading extension module utils... +15: No modifications detected for re-loaded extension module utils, skipping build step... +15: Loading extension module utils... +16: No modifications detected for re-loaded extension module utils, skipping build step... +16: Loading extension module utils... +16: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +16: +16: +16: Loading extension module utils...Loading extension module utils... +16: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +16: +16: +16: Loading extension module utils... +16: No modifications detected for re-loaded extension module utils, skipping build step... +16: Loading extension module utils... +15: No modifications detected for re-loaded extension module utils, skipping build step... +15: Loading extension module utils... +16: No modifications detected for re-loaded extension module utils, skipping build step... +16: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +16: +16: Loading extension module utils... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: No modifications detected for re-loaded extension module utils, skipping build step... +18: Loading extension module utils... +18: No modifications detected for re-loaded extension module utils, skipping build step... +18: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +18: +18: Loading extension module utils... +18: No modifications detected for re-loaded extension module utils, skipping build step... +18: Loading extension module utils... +18: No modifications detected for re-loaded extension module utils, skipping build step... +18: Loading extension module utils... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +18: No modifications detected for re-loaded extension module utils, skipping build step... +18: Loading extension module utils... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: +17: +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: +17: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +17: No modifications detected for re-loaded extension module utils, skipping build step... +17: Loading extension module utils... +17: No modifications detected for re-loaded extension module utils, skipping build step... +17: Loading extension module utils... +17: No modifications detected for re-loaded extension module utils, skipping build step... +17: Loading extension module utils... +17: No modifications detected for re-loaded extension module utils, skipping build step... +17: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +17: +17: Loading extension module utils... +17: No modifications detected for re-loaded extension module utils, skipping build step... +17: Loading extension module utils... +17: No modifications detected for re-loaded extension module utils, skipping build step... +17: Loading extension module utils... +17: No modifications detected for re-loaded extension module utils, skipping build step... +17: Loading extension module utils... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: No modifications detected for re-loaded extension module utils, skipping build step... +28: Loading extension module utils... +28: No modifications detected for re-loaded extension module utils, skipping build step... +28: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +28: +28: Loading extension module utils... +28: No modifications detected for re-loaded extension module utils, skipping build step... +28: Loading extension module utils... +28: No modifications detected for re-loaded extension module utils, skipping build step... +28: Loading extension module utils... +28: No modifications detected for re-loaded extension module utils, skipping build step... +28: Loading extension module utils... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: +21: +21: +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +21: No modifications detected for re-loaded extension module utils, skipping build step... +21: Loading extension module utils... +21: No modifications detected for re-loaded extension module utils, skipping build step... +21: Loading extension module utils... +21: No modifications detected for re-loaded extension module utils, skipping build step... +21: Loading extension module utils... +21: No modifications detected for re-loaded extension module utils, skipping build step... +21: Loading extension module utils... +21: No modifications detected for re-loaded extension module utils, skipping build step... +21: Loading extension module utils... +21: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +21: No modifications detected for re-loaded extension module utils, skipping build step... +21: Loading extension module utils... +21: Loading extension module utils... +21: +21: Loading extension module utils... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: + 9: +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: No modifications detected for re-loaded extension module utils, skipping build step... +12: Loading extension module utils... +12: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +12: +12: Loading extension module utils... +12: Loading extension module utils... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: No modifications detected for re-loaded extension module utils, skipping build step... + 9: Loading extension module utils... +12: No modifications detected for re-loaded extension module utils, skipping build step... +12: Loading extension module utils... +12: No modifications detected for re-loaded extension module utils, skipping build step... +12: Loading extension module utils... +13: No modifications detected for re-loaded extension module utils, skipping build step... +13: Loading extension module utils... +13: No modifications detected for re-loaded extension module utils, skipping build step... +13: Loading extension module utils... +13: No modifications detected for re-loaded extension module utils, skipping build step... +13: Loading extension module utils... + 9: No modifications detected for re-loaded extension module utils, skipping build step... + 9: Loading extension module utils... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: No modifications detected for re-loaded extension module utils, skipping build step... + 9: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... + 9: + 9: Loading extension module utils... +13: No modifications detected for re-loaded extension module utils, skipping build step... +13: Loading extension module utils... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: No modifications detected for re-loaded extension module utils, skipping build step... +30: Loading extension module utils... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: No modifications detected for re-loaded extension module utils, skipping build step... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: No modifications detected for re-loaded extension module utils, skipping build step... +20: Loading extension module utils... +20: No modifications detected for re-loaded extension module utils, skipping build step... +20: Loading extension module utils... +20: No modifications detected for re-loaded extension module utils, skipping build step... +20: Loading extension module utils... +30: Loading extension module utils... +20: No modifications detected for re-loaded extension module utils, skipping build step... +20: Loading extension module utils... +20: No modifications detected for re-loaded extension module utils, skipping build step... +20: Loading extension module utils... +20: No modifications detected for re-loaded extension module utils, skipping build step... +20: Loading extension module utils... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: No modifications detected for re-loaded extension module utils, skipping build step... +30: Loading extension module utils... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: No modifications detected for re-loaded extension module utils, skipping build step... +30: Loading extension module utils... +30: No modifications detected for re-loaded extension module utils, skipping build step... +30: Loading extension module utils... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: No modifications detected for re-loaded extension module utils, skipping build step... +30: Loading extension module utils... +10: No modifications detected for re-loaded extension module utils, skipping build step... +10: Loading extension module utils... +10: No modifications detected for re-loaded extension module utils, skipping build step... +10: Loading extension module utils... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: No modifications detected for re-loaded extension module utils, skipping build step... +10: Loading extension module utils... +10: No modifications detected for re-loaded extension module utils, skipping build step... +10: Loading extension module utils... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: + 3: + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: No modifications detected for re-loaded extension module utils, skipping build step... + 3: Loading extension module utils... +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: No modifications detected for re-loaded extension module utils, skipping build step... +23: Loading extension module utils... +23: No modifications detected for re-loaded extension module utils, skipping build step... +23: Loading extension module utils... +23: No modifications detected for re-loaded extension module utils, skipping build step... +23: Loading extension module utils... +23: No modifications detected for re-loaded extension module utils, skipping build step... +23: Loading extension module utils... +23: No modifications detected for re-loaded extension module utils, skipping build step... +23: Loading extension module utils... + 3: No modifications detected for re-loaded extension module utils, skipping build step... + 3: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... + 3: + 3: Loading extension module utils... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 3: No modifications detected for re-loaded extension module utils, skipping build step... + 3: Loading extension module utils... +23: No modifications detected for re-loaded extension module utils, skipping build step... +23: Loading extension module utils... + 3: No modifications detected for re-loaded extension module utils, skipping build step... + 3: Loading extension module utils... + 3: No modifications detected for re-loaded extension module utils, skipping build step... + 3: Loading extension module utils... + 3: No modifications detected for re-loaded extension module utils, skipping build step... + 3: Loading extension module utils... + 3: No modifications detected for re-loaded extension module utils, skipping build step... + 3: Loading extension module utils... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: No modifications detected for re-loaded extension module utils, skipping build step... +24: Loading extension module utils... +24: No modifications detected for re-loaded extension module utils, skipping build step... +24: Loading extension module utils... +24: No modifications detected for re-loaded extension module utils, skipping build step... +24: Loading extension module utils... +24: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +24: +24: Loading extension module utils... +24: Loading extension module utils... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 2: No modifications detected for re-loaded extension module utils, skipping build step... + 2: Loading extension module utils... + 2: No modifications detected for re-loaded extension module utils, skipping build step... + 2: Loading extension module utils... + 2: No modifications detected for re-loaded extension module utils, skipping build step... + 6: No modifications detected for re-loaded extension module utils, skipping build step... + 6: Loading extension module utils... + 2: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... + 2: + 2: Loading extension module utils... + 6: No modifications detected for re-loaded extension module utils, skipping build step... + 6: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... + 6: + 6: Loading extension module utils... + 6: No modifications detected for re-loaded extension module utils, skipping build step... + 6: Loading extension module utils... + 6: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... + 6: + 6: Loading extension module utils...Loading extension module utils... + 6: + 2: No modifications detected for re-loaded extension module utils, skipping build step... + 2: Loading extension module utils... + 2: No modifications detected for re-loaded extension module utils, skipping build step... + 2: Loading extension module utils... + 6: No modifications detected for re-loaded extension module utils, skipping build step... + 6: Loading extension module utils... + 6: No modifications detected for re-loaded extension module utils, skipping build step... + 6: Loading extension module utils... + 2: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... + 2: + 2: Loading extension module utils...Loading extension module utils... + 2: +24: Loading extension module utils... +24: Loading extension module utils... +24: Loading extension module utils... +30: Loading extension module utils... + 9: Loading extension module utils... + 9: Loading extension module utils... +13: Loading extension module utils... +31: Loading extension module utils... + 8: Loading extension module utils... +25: Loading extension module utils... +12: Loading extension module utils... +10: Loading extension module utils... +27: Loading extension module utils... +11: Loading extension module utils... +14: Loading extension module utils... +26: Loading extension module utils... +15: Loading extension module utils... +30: Loading extension module utils... +28: Loading extension module utils... +20: Loading extension module utils... +20: Loading extension module utils... +29: Loading extension module utils... +18: Loading extension module utils... + 9: Loading extension module utils... + 0: Loading extension module utils... +19: Loading extension module utils... +13: Loading extension module utils... +25: Loading extension module utils... +12: Loading extension module utils... +15: Loading extension module utils... +31: Loading extension module utils... +23: Loading extension module utils... +27: Loading extension module utils... + 9: Loading extension module utils... +11: Loading extension module utils... +14: Loading extension module utils... +26: Loading extension module utils... +28: Loading extension module utils... + 0: Loading extension module utils... +29: Loading extension module utils... +18: Loading extension module utils... +22: Loading extension module utils... +15: Loading extension module utils... +19: Loading extension module utils... +13: Loading extension module utils... + 8: Loading extension module utils... +10: Loading extension module utils... +12: Loading extension module utils... +23: Loading extension module utils... +11: Loading extension module utils... +14: Loading extension module utils... + 0: Loading extension module utils... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: No modifications detected for re-loaded extension module utils, skipping build step... +24: Loading extension module utils... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +24: No modifications detected for re-loaded extension module utils, skipping build step... +24: Loading extension module utils... +24: No modifications detected for re-loaded extension module utils, skipping build step... +24: Loading extension module utils... +22: Loading extension module utils... +15: Loading extension module utils... +13: Loading extension module utils... +11: Loading extension module utils... +14: Loading extension module utils... +10: Loading extension module utils... +10: Loading extension module utils... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: No modifications detected for re-loaded extension module utils, skipping build step... + 8: Loading extension module utils... + 8: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 8: No modifications detected for re-loaded extension module utils, skipping build step... + 8: Loading extension module utils... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: No modifications detected for re-loaded extension module utils, skipping build step... + 9: Loading extension module utils... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 9: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... + 9: + 9: Loading extension module utils...Loading extension module utils... + 9: + 9: No modifications detected for re-loaded extension module utils, skipping build step... + 9: Loading extension module utils... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +10: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +30: No modifications detected for re-loaded extension module utils, skipping build step... +30: Loading extension module utils... +30: No modifications detected for re-loaded extension module utils, skipping build step... +30: Loading extension module utils... +10: No modifications detected for re-loaded extension module utils, skipping build step... +10: Loading extension module utils... +10: No modifications detected for re-loaded extension module utils, skipping build step... +10: Loading extension module utils... +10: No modifications detected for re-loaded extension module utils, skipping build step... +10: Loading extension module utils... +10: No modifications detected for re-loaded extension module utils, skipping build step... +10: Loading extension module utils... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +15: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +15: +15: Loading extension module utils...Loading extension module utils... +15: +15: No modifications detected for re-loaded extension module utils, skipping build step... +15: Loading extension module utils... +15: No modifications detected for re-loaded extension module utils, skipping build step... +15: Loading extension module utils... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +20: No modifications detected for re-loaded extension module utils, skipping build step... +20: Loading extension module utils... +20: No modifications detected for re-loaded extension module utils, skipping build step... +20: Loading extension module utils... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +13: No modifications detected for re-loaded extension module utils, skipping build step... +13: Loading extension module utils... +13: No modifications detected for re-loaded extension module utils, skipping build step... +13: Loading extension module utils... +13: No modifications detected for re-loaded extension module utils, skipping build step... +13: Loading extension module utils... +13: No modifications detected for re-loaded extension module utils, skipping build step... +13: Loading extension module utils... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... + 0: + 0: + 0: Loading extension module utils...Loading extension module utils...Loading extension module utils... + 0: + 0: +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: No modifications detected for re-loaded extension module utils, skipping build step... +23: Loading extension module utils... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: No modifications detected for re-loaded extension module utils, skipping build step... +12: Loading extension module utils... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: No modifications detected for re-loaded extension module utils, skipping build step... +12: Loading extension module utils... +31: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: No modifications detected for re-loaded extension module utils, skipping build step... +27: Loading extension module utils... +23: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +23: No modifications detected for re-loaded extension module utils, skipping build step... +23: Loading extension module utils... +27: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +27: No modifications detected for re-loaded extension module utils, skipping build step... +27: Loading extension module utils... +31: No modifications detected for re-loaded extension module utils, skipping build step... +31: Loading extension module utils... +31: No modifications detected for re-loaded extension module utils, skipping build step... +31: Loading extension module utils... +28: No modifications detected for re-loaded extension module utils, skipping build step... +28: Loading extension module utils... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +25: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +28: No modifications detected for re-loaded extension module utils, skipping build step... +28: Loading extension module utils... +19: No modifications detected for re-loaded extension module utils, skipping build step... +19: Loading extension module utils... +25: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +25: +25: Loading extension module utils...Loading extension module utils... +25: +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: No modifications detected for re-loaded extension module utils, skipping build step... +11: Loading extension module utils... +18: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +19: No modifications detected for re-loaded extension module utils, skipping build step... +19: Loading extension module utils... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: No modifications detected for re-loaded extension module utils, skipping build step... +11: Loading extension module utils... +12: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +12: No modifications detected for re-loaded extension module utils, skipping build step... +12: Loading extension module utils... +18: No modifications detected for re-loaded extension module utils, skipping build step... +18: Loading extension module utils... +18: No modifications detected for re-loaded extension module utils, skipping build step... +18: Loading extension module utils... +14: No modifications detected for re-loaded extension module utils, skipping build step... +14: Loading extension module utils... +29: No modifications detected for re-loaded extension module utils, skipping build step... +29: Loading extension module utils... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +29: No modifications detected for re-loaded extension module utils, skipping build step... +29: Loading extension module utils... +26: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: No modifications detected for re-loaded extension module utils, skipping build step... +26: Loading extension module utils... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +26: No modifications detected for re-loaded extension module utils, skipping build step... +26: Loading extension module utils... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: No modifications detected for re-loaded extension module utils, skipping build step... +14: Loading extension module utils... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: No modifications detected for re-loaded extension module utils, skipping build step... +11: Loading extension module utils... +22: No modifications detected for re-loaded extension module utils, skipping build step... +22: Loading extension module utils... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: No modifications detected for re-loaded extension module utils, skipping build step... +14: Loading extension module utils... +11: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +11: No modifications detected for re-loaded extension module utils, skipping build step... +11: Loading extension module utils... +22: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +22: No modifications detected for re-loaded extension module utils, skipping build step... +22: Loading extension module utils... +14: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +14: No modifications detected for re-loaded extension module utils, skipping build step... +14: Loading extension module utils... + 0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... + 0: No modifications detected for re-loaded extension module utils, skipping build step... + 0: Loading extension module utils... + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/utils.py:349: UserWarning: Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings + 0: warnings.warn("Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings") diff --git a/8b7178b4b/3583607.out b/8b7178b4b/3583607.out new file mode 100644 index 0000000000000000000000000000000000000000..2d2e130a8d536c79a0cdf5aa48a4937f69958853 --- /dev/null +++ b/8b7178b4b/3583607.out @@ -0,0 +1,14912 @@ +Model parameters: d_model 4096 ffw_size 16384 kv_size 128 n_heads 32 n_layers 42 +Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 4 --pipeline-model-parallel-size 4 --num-layers 42 --hidden-size 4096 --num-attention-heads 32 --kv-channels 128 --ffn-hidden-size 16384 --seq-length 2048 --max-position-embeddings 2048 --micro-batch-size 1 --global-batch-size 512 --train-samples 1 --vocab-file gpt2/vocab.json --merge-file gpt2/merges.txt --clip-grad 1.0 --kill-switch-path kill-switch-8b7178b4bval --bf16 --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 2e-4 --min-lr 2e-5 --lr-decay-style cosine --lr-decay-samples 1 --lr-warmup-samples 0 --clip-grad 1.0 --weight-decay 1e-1 --override-lr-scheduler --reset-progress --no-load-optim --log-interval 10 --save-interval 5000 --eval-interval 1 --eval-iters 100 --eval-only true --tensorboard-dir tensorboard_8b7178b4bval --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save lm1-8b7-178b-c4-repetitions/8b7178b4b --load lm1-8b7-178b-c4-repetitions/8b7178b4b --train-weighted-split-paths-path train400m.txt --valid-weighted-split-paths-path val.txt --data-impl mmap --num-workers 0 --valid-num-workers 0 --deepspeed --deepspeed_config ds_configs/3583607.json --zero-stage 0 +START 3583607: Thu 25 May 2023 01:34:29 PM EEST + 0: + 0: + 0: ======================= ROCm System Management Interface ======================= + 0: ================================= Concise Info ================================= + 0: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 0: 0 46.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 0: 1 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 0: 2 45.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 0: 3 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 0: 4 44.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 0: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 0: 6 45.0c 82.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 0: 7 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 0: ================================================================================ + 0: ============================= End of ROCm SMI Log ============================== +19: +19: +19: ======================= ROCm System Management Interface ======================= +19: ================================= Concise Info ================================= +19: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +19: 0 41.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +19: 1 52.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +19: 2 37.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +19: 3 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +19: 4 45.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +19: 5 50.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +19: 6 40.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +19: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +19: ================================================================================ +19: ============================= End of ROCm SMI Log ============================== +22: +22: +22: ======================= ROCm System Management Interface ======================= +22: ================================= Concise Info ================================= +22: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +22: 0 46.0c 97.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +22: 1 50.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +22: 2 47.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +22: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +22: 4 42.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +22: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +22: 6 39.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +22: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +22: ================================================================================ +22: ============================= End of ROCm SMI Log ============================== +13: +13: +13: ======================= ROCm System Management Interface ======================= +13: ================================= Concise Info ================================= +13: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +13: 0 45.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +13: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +13: 2 43.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +13: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +13: 4 42.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +13: 5 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +13: 6 43.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +13: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +13: ================================================================================ +13: ============================= End of ROCm SMI Log ============================== + 9: + 9: + 9: ======================= ROCm System Management Interface ======================= + 9: ================================= Concise Info ================================= + 9: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 9: 0 46.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 9: 1 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 9: 2 42.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 9: 3 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 9: 4 42.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 9: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 9: 6 42.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 9: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 9: ================================================================================ + 9: ============================= End of ROCm SMI Log ============================== +30: +30: +30: ======================= ROCm System Management Interface ======================= +30: ================================= Concise Info ================================= +30: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +30: 0 43.0c 98.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +30: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +30: 2 41.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +30: 3 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +30: 4 40.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +30: 5 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +30: 6 43.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +30: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +30: ================================================================================ +30: ============================= End of ROCm SMI Log ============================== +16: +16: +16: ======================= ROCm System Management Interface ======================= +16: ================================= Concise Info ================================= +16: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +16: 0 42.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +16: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +16: 2 41.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +16: 3 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +16: 4 43.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +16: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +16: 6 43.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +16: 7 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +16: ================================================================================ +16: ============================= End of ROCm SMI Log ============================== +17: +17: +17: ======================= ROCm System Management Interface ======================= +17: ================================= Concise Info ================================= +17: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +17: 0 43.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +17: 1 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +17: 2 38.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +17: 3 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +17: 4 46.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +17: 5 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +17: 6 38.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +17: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +17: ================================================================================ +17: ============================= End of ROCm SMI Log ============================== +10: +10: +10: ======================= ROCm System Management Interface ======================= +10: ================================= Concise Info ================================= +10: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +10: 0 46.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +10: 1 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +10: 2 43.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +10: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +10: 4 39.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +10: 5 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +10: 6 43.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +10: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +10: ================================================================================ +10: ============================= End of ROCm SMI Log ============================== +26: +26: +26: ======================= ROCm System Management Interface ======================= +26: ================================= Concise Info ================================= +26: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +26: 0 45.0c 97.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +26: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +26: 2 42.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +26: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +26: 4 44.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +26: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +26: 6 38.0c 82.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +26: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +26: ================================================================================ +26: ============================= End of ROCm SMI Log ============================== +24: +24: +24: ======================= ROCm System Management Interface ======================= +24: ================================= Concise Info ================================= +24: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +24: 0 46.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +24: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +24: 2 47.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +24: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +24: 4 41.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +24: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +24: 6 40.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +24: 7 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +24: ================================================================================ +24: ============================= End of ROCm SMI Log ============================== +27: +27: +27: ======================= ROCm System Management Interface ======================= +27: ================================= Concise Info ================================= +27: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +27: 0 42.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +27: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +27: 2 38.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +27: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +27: 4 48.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +27: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +27: 6 42.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +27: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +27: ================================================================================ +27: ============================= End of ROCm SMI Log ============================== + 7: + 7: + 7: ======================= ROCm System Management Interface ======================= + 7: ================================= Concise Info ================================= + 7: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 7: 0 46.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 7: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 7: 2 45.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 7: 3 40.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 7: 4 41.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 7: 5 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 7: 6 35.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 7: 7 40.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 7: ================================================================================ + 7: ============================= End of ROCm SMI Log ============================== + 1: + 1: + 1: ======================= ROCm System Management Interface ======================= + 1: ================================= Concise Info ================================= + 1: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 1: 0 44.0c 97.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 1: 1 40.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 1: 2 37.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 1: 3 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 1: 4 40.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 1: 5 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 1: 6 36.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 1: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 1: ================================================================================ + 1: ============================= End of ROCm SMI Log ============================== +14: +14: +14: ======================= ROCm System Management Interface ======================= +14: ================================= Concise Info ================================= +14: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +14: 0 46.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +14: 1 40.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +14: 2 46.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +14: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +14: 4 48.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +14: 5 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +14: 6 45.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +14: 7 52.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +14: ================================================================================ +14: ============================= End of ROCm SMI Log ============================== +12: +12: +12: ======================= ROCm System Management Interface ======================= +12: ================================= Concise Info ================================= +12: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +12: 0 46.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +12: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +12: 2 36.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +12: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +12: 4 43.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +12: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +12: 6 41.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +12: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +12: ================================================================================ +12: ============================= End of ROCm SMI Log ============================== +31: +31: +31: ======================= ROCm System Management Interface ======================= +31: ================================= Concise Info ================================= +31: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +31: 0 46.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +31: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +31: 2 37.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +31: 3 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +31: 4 44.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +31: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +31: 6 43.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +31: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +31: ================================================================================ +31: ============================= End of ROCm SMI Log ============================== +28: +28: +28: ======================= ROCm System Management Interface ======================= +28: ================================= Concise Info ================================= +28: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +28: 0 47.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +28: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +28: 2 39.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +28: 3 38.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +28: 4 42.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +28: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +28: 6 39.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +28: 7 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +28: ================================================================================ +28: ============================= End of ROCm SMI Log ============================== +20: +20: +20: ======================= ROCm System Management Interface ======================= +20: ================================= Concise Info ================================= +20: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +20: 0 48.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +20: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +20: 2 41.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +20: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +20: 4 46.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +20: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +20: 6 44.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +20: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +20: ================================================================================ +20: ============================= End of ROCm SMI Log ============================== + 5: + 5: + 5: ======================= ROCm System Management Interface ======================= + 5: ================================= Concise Info ================================= + 5: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 5: 0 38.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 5: 1 51.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 5: 2 42.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 5: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 5: 4 40.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 5: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 5: 6 39.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 5: 7 38.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 5: ================================================================================ + 5: ============================= End of ROCm SMI Log ============================== +23: +23: +23: ======================= ROCm System Management Interface ======================= +23: ================================= Concise Info ================================= +23: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +23: 0 46.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +23: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +23: 2 40.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +23: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +23: 4 41.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +23: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +23: 6 37.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +23: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +23: ================================================================================ +23: ============================= End of ROCm SMI Log ============================== +25: +25: +25: ======================= ROCm System Management Interface ======================= +25: ================================= Concise Info ================================= +25: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +25: 0 40.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +25: 1 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +25: 2 44.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +25: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +25: 4 44.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +25: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +25: 6 34.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +25: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +25: ================================================================================ +25: ============================= End of ROCm SMI Log ============================== +29: +29: +29: ======================= ROCm System Management Interface ======================= +29: ================================= Concise Info ================================= +29: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +29: 0 41.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +29: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +29: 2 38.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +29: 3 38.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +29: 4 41.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +29: 5 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +29: 6 41.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +29: 7 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +29: ================================================================================ +29: ============================= End of ROCm SMI Log ============================== + 3: + 3: + 3: ======================= ROCm System Management Interface ======================= + 3: ================================= Concise Info ================================= + 3: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 3: 0 50.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 3: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 3: 2 37.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 3: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 3: 4 39.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 3: 5 50.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 3: 6 39.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 3: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 3: ================================================================================ + 3: ============================= End of ROCm SMI Log ============================== +21: +21: +21: ======================= ROCm System Management Interface ======================= +21: ================================= Concise Info ================================= +21: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +21: 0 44.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +21: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +21: 2 41.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +21: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +21: 4 41.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +21: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +21: 6 47.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +21: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +21: ================================================================================ +21: ============================= End of ROCm SMI Log ============================== +15: +15: +15: ======================= ROCm System Management Interface ======================= +15: ================================= Concise Info ================================= +15: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +15: 0 47.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +15: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +15: 2 40.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +15: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +15: 4 39.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +15: 5 51.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +15: 6 45.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +15: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +15: ================================================================================ +15: ============================= End of ROCm SMI Log ============================== +11: +11: +11: ======================= ROCm System Management Interface ======================= +11: ================================= Concise Info ================================= +11: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +11: 0 45.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +11: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +11: 2 40.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +11: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +11: 4 44.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +11: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +11: 6 37.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +11: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +11: ================================================================================ +11: ============================= End of ROCm SMI Log ============================== +18: +18: +18: ======================= ROCm System Management Interface ======================= +18: ================================= Concise Info ================================= +18: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +18: 0 48.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +18: 1 54.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +18: 2 38.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +18: 3 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +18: 4 40.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +18: 5 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +18: 6 38.0c 82.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +18: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +18: ================================================================================ +18: ============================= End of ROCm SMI Log ============================== + 6: + 6: + 6: ======================= ROCm System Management Interface ======================= + 6: ================================= Concise Info ================================= + 6: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 6: 0 48.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 6: 1 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 6: 2 39.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 6: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 6: 4 41.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 6: 5 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 6: 6 43.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 6: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 6: ================================================================================ + 6: ============================= End of ROCm SMI Log ============================== + 2: + 2: + 2: ======================= ROCm System Management Interface ======================= + 2: ================================= Concise Info ================================= + 2: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 2: 0 44.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 2: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 2: 2 45.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 2: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 2: 4 40.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 2: 5 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 2: 6 37.0c 98.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 2: 7 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 2: ================================================================================ + 2: ============================= End of ROCm SMI Log ============================== + 4: + 4: + 4: ======================= ROCm System Management Interface ======================= + 4: ================================= Concise Info ================================= + 4: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 4: 0 44.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 4: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 4: 2 40.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 4: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 4: 4 50.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 4: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 4: 6 40.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 4: 7 40.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 4: ================================================================================ + 4: ============================= End of ROCm SMI Log ============================== + 8: + 8: + 8: ======================= ROCm System Management Interface ======================= + 8: ================================= Concise Info ================================= + 8: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 8: 0 43.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 8: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 8: 2 40.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 8: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 8: 4 43.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 8: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 8: 6 38.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 8: 7 50.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 8: ================================================================================ + 8: ============================= End of ROCm SMI Log ============================== +27: Launching on nid006132 (27/32), master nid006105 port 9999, GPUs 8, CUDA: True +20: Launching on nid006125 (20/32), master nid006105 port 9999, GPUs 8, CUDA: True +17: Launching on nid006122 (17/32), master nid006105 port 9999, GPUs 8, CUDA: True +30: Launching on nid006135 (30/32), master nid006105 port 9999, GPUs 8, CUDA: True +13: Launching on nid006118 (13/32), master nid006105 port 9999, GPUs 8, CUDA: True +19: Launching on nid006124 (19/32), master nid006105 port 9999, GPUs 8, CUDA: True +24: Launching on nid006129 (24/32), master nid006105 port 9999, GPUs 8, CUDA: True +14: Launching on nid006119 (14/32), master nid006105 port 9999, GPUs 8, CUDA: True +22: Launching on nid006127 (22/32), master nid006105 port 9999, GPUs 8, CUDA: True +10: Launching on nid006115 (10/32), master nid006105 port 9999, GPUs 8, CUDA: True +23: Launching on nid006128 (23/32), master nid006105 port 9999, GPUs 8, CUDA: True + 9: Launching on nid006114 (9/32), master nid006105 port 9999, GPUs 8, CUDA: True +28: Launching on nid006133 (28/32), master nid006105 port 9999, GPUs 8, CUDA: True + 5: Launching on nid006110 (5/32), master nid006105 port 9999, GPUs 8, CUDA: True + 1: Launching on nid006106 (1/32), master nid006105 port 9999, GPUs 8, CUDA: True +21: Launching on nid006126 (21/32), master nid006105 port 9999, GPUs 8, CUDA: True +12: Launching on nid006117 (12/32), master nid006105 port 9999, GPUs 8, CUDA: True + 7: Launching on nid006112 (7/32), master nid006105 port 9999, GPUs 8, CUDA: True +31: Launching on nid006136 (31/32), master nid006105 port 9999, GPUs 8, CUDA: True +25: Launching on nid006130 (25/32), master nid006105 port 9999, GPUs 8, CUDA: True +16: Launching on nid006121 (16/32), master nid006105 port 9999, GPUs 8, CUDA: True +26: Launching on nid006131 (26/32), master nid006105 port 9999, GPUs 8, CUDA: True + 3: Launching on nid006108 (3/32), master nid006105 port 9999, GPUs 8, CUDA: True + 2: Launching on nid006107 (2/32), master nid006105 port 9999, GPUs 8, CUDA: True + 4: Launching on nid006109 (4/32), master nid006105 port 9999, GPUs 8, CUDA: True + 6: Launching on nid006111 (6/32), master nid006105 port 9999, GPUs 8, CUDA: True + 8: Launching on nid006113 (8/32), master nid006105 port 9999, GPUs 8, CUDA: True +18: Launching on nid006123 (18/32), master nid006105 port 9999, GPUs 8, CUDA: True +11: Launching on nid006116 (11/32), master nid006105 port 9999, GPUs 8, CUDA: True + 0: Launching on nid006105 (0/32), master nid006105 port 9999, GPUs 8, CUDA: True +15: Launching on nid006120 (15/32), master nid006105 port 9999, GPUs 8, CUDA: True +29: Launching on nid006134 (29/32), master nid006105 port 9999, GPUs 8, CUDA: True + 0: using world size: 256, data-parallel-size: 16, tensor-model-parallel size: 4, pipeline-model-parallel size: 4 + 0: accumulate and all-reduce gradients in fp32 for bfloat16 data type. + 0: using torch.bfloat16 for parameters ... + 0: ------------------------ arguments ------------------------ + 0: abort_on_unmet_fused_kernel_constraints ......... False + 0: accumulate_allreduce_grads_in_fp32 .............. True + 0: adam_beta1 ...................................... 0.9 + 0: adam_beta2 ...................................... 0.999 + 0: adam_eps ........................................ 1e-08 + 0: adlr_autoresume ................................. False + 0: adlr_autoresume_interval ........................ 1000 + 0: apply_query_key_layer_scaling ................... True + 0: apply_residual_connection_post_layernorm ........ False + 0: attention_dropout ............................... 0.1 + 0: attention_softmax_in_fp32 ....................... False + 0: bert_binary_head ................................ True + 0: bert_load ....................................... None + 0: bf16 ............................................ True + 0: bias_dropout_fusion ............................. True + 0: bias_gelu_fusion ................................ True + 0: biencoder_projection_dim ........................ 0 + 0: biencoder_shared_query_context_model ............ False + 0: block_data_path ................................. None + 0: checkpoint_activations .......................... False + 0: checkpoint_in_cpu ............................... False + 0: checkpoint_num_layers ........................... 1 + 0: clip_grad ....................................... 1.0 + 0: codecarbon_dir .................................. None + 0: consumed_train_samples .......................... 0 + 0: consumed_train_tokens ........................... 0 + 0: consumed_valid_samples .......................... 0 + 0: contigious_checkpointing ........................ False + 0: cpu_optimizer ................................... False + 0: cpu_torch_adam .................................. False + 0: curriculum_learning ............................. False + 0: data_impl ....................................... mmap + 0: data_parallel_size .............................. 16 + 0: data_path ....................................... None + 0: dataloader_type ................................. single + 0: DDP_impl ........................................ local + 0: decoder_seq_length .............................. None + 0: deepscale ....................................... False + 0: deepscale_config ................................ None + 0: deepspeed ....................................... True + 0: deepspeed_activation_checkpointing .............. False + 0: deepspeed_config ................................ ds_configs/3583607.json + 0: deepspeed_mpi ................................... False + 0: distribute_checkpointed_activations ............. False + 0: distributed_backend ............................. nccl + 0: embed_layernorm ................................. False + 0: embedding_path .................................. None + 0: encoder_seq_length .............................. 2048 + 0: eod_mask_loss ................................... False + 0: eval_interval ................................... 1 + 0: eval_iters ...................................... 100 + 0: eval_only ....................................... True + 0: evidence_data_path .............................. None + 0: exit_duration_in_mins ........................... None + 0: exit_interval ................................... None + 0: ffn_hidden_size ................................. 16384 + 0: finetune ........................................ False + 0: fp16 ............................................ False + 0: fp16_lm_cross_entropy ........................... False + 0: fp32_residual_connection ........................ False + 0: gigaflos_no_embeds .............................. 0 + 0: global_batch_size ............................... 512 + 0: glu_activation .................................. None + 0: hidden_dropout .................................. 0.1 + 0: hidden_size ..................................... 4096 + 0: hysteresis ...................................... 2 + 0: ict_head_size ................................... None + 0: ict_load ........................................ None + 0: img_dim ......................................... 224 + 0: indexer_batch_size .............................. 128 + 0: indexer_log_interval ............................ 1000 + 0: inference ....................................... False + 0: init_method_std ................................. 0.02 + 0: init_method_xavier_uniform ...................... False + 0: initial_loss_scale .............................. 4294967296 + 0: kill_switch_path ................................ kill-switch-8b7178b4bval + 0: kv_channels ..................................... 128 + 0: layer_norm_fusion ............................... True + 0: layernorm_epsilon ............................... 1e-05 + 0: lazy_mpu_init ................................... None + 0: load ............................................ lm1-8b7-178b-c4-repetitions/8b7178b4b + 0: local_rank ...................................... None + 0: log_batch_size_to_tensorboard ................... True + 0: log_interval .................................... 10 + 0: log_learning_rate_to_tensorboard ................ True + 0: log_level ....................................... None + 0: log_level_replica ............................... None + 0: log_loss_scale_to_tensorboard ................... True + 0: log_num_zeros_in_grad ........................... False + 0: log_params_norm ................................. False + 0: log_path ........................................ None + 0: log_timers_to_tensorboard ....................... True + 0: log_validation_ppl_to_tensorboard ............... True + 0: loss_on_targets_only ............................ False + 0: loss_scale ...................................... None + 0: loss_scale_window ............................... 1000 + 0: lr .............................................. 0.0002 + 0: lr_decay_iters .................................. None + 0: lr_decay_samples ................................ 1 + 0: lr_decay_style .................................. cosine + 0: lr_decay_tokens ................................. None + 0: lr_warmup_fraction .............................. None + 0: lr_warmup_iters ................................. 0 + 0: lr_warmup_samples ............................... 0 + 0: make_vocab_size_divisible_by .................... 128 + 0: mask_prob ....................................... 0.15 + 0: masked_softmax_fusion ........................... True + 0: max_position_embeddings ......................... 2048 + 0: mean_noise_span_length .......................... None + 0: memory_centric_tiled_linear ..................... False + 0: merge_file ...................................... gpt2/merges.txt + 0: micro_batch_size ................................ 1 + 0: min_loss_scale .................................. 1.0 + 0: min_lr .......................................... 2e-05 + 0: mmap_warmup ..................................... False + 0: no_load_optim ................................... True + 0: no_load_rng ..................................... None + 0: no_save_optim ................................... None + 0: no_save_rng ..................................... None + 0: noise_density ................................... None + 0: num_attention_heads ............................. 32 + 0: num_channels .................................... 3 + 0: num_classes ..................................... 1000 + 0: num_layers ...................................... 42 + 0: num_layers_per_virtual_pipeline_stage ........... None + 0: num_workers ..................................... 0 + 0: onnx_safe ....................................... None + 0: openai_gelu ..................................... False + 0: optimizer ....................................... adam + 0: optimizer_fusion ................................ True + 0: override_lr_scheduler ........................... True + 0: pad_vocab_size_to ............................... None + 0: params_dtype .................................... torch.bfloat16 + 0: partition_activations ........................... False + 0: patch_dim ....................................... 16 + 0: pipeline_model_parallel_size .................... 4 + 0: position_embedding_type ......................... PositionEmbeddingType.absolute + 0: pp_partition_method ............................. None + 0: profile_backward ................................ False + 0: query_in_block_prob ............................. 0.1 + 0: rampup_batch_size ............................... None + 0: rank ............................................ 0 + 0: remote_device ................................... none + 0: reset_attention_mask ............................ False + 0: reset_position_ids .............................. False + 0: reset_progress .................................. True + 0: retriever_report_topk_accuracies ................ [] + 0: retriever_score_scaling ......................... False + 0: retriever_seq_length ............................ 256 + 0: reweight_loss_based_on_position_frequency ....... False + 0: sample_rate ..................................... 1.0 + 0: save ............................................ lm1-8b7-178b-c4-repetitions/8b7178b4b + 0: save_interval ................................... 5000 + 0: scatter_gather_tensors_in_pipeline .............. True + 0: scattered_embeddings ............................ False + 0: seed ............................................ 1234 + 0: seq_length ...................................... 2048 + 0: sgd_momentum .................................... 0.9 + 0: short_seq_prob .................................. 0.1 + 0: skip_train_iteration_range ...................... None + 0: split ........................................... None + 0: split_transformers .............................. False + 0: sync_tp_duplicated_parameters ................... False + 0: synchronize_each_layer .......................... False + 0: tensor_model_parallel_size ...................... 4 + 0: tensorboard_dir ................................. tensorboard_8b7178b4bval + 0: tensorboard_log_interval ........................ 1 + 0: tensorboard_queue_size .......................... 5 + 0: test_weighted_split_paths ....................... None + 0: test_weighted_split_paths_path .................. None + 0: tile_factor ..................................... 1 + 0: titles_data_path ................................ None + 0: tokenizer_name_or_path .......................... None + 0: tokenizer_type .................................. GPT2BPETokenizer + 0: train_iters ..................................... None + 0: train_samples ................................... 1 + 0: train_tokens .................................... None + 0: train_weighted_split_names ...................... ['train'] + 0: train_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_400M_text_document']] + 0: train_weighted_split_paths_path ................. None + 0: train_weighted_split_splits ..................... [['0:1']] + 0: train_weighted_split_weights .................... [['1.0']] + 0: universal_checkpoint ............................ False + 0: use_bnb_optimizer ............................... False + 0: use_checkpoint_lr_scheduler ..................... False + 0: use_contiguous_buffers_in_ddp ................... True + 0: use_cpu_initialization .......................... None + 0: use_one_sent_docs ............................... False + 0: use_pin_memory .................................. False + 0: valid_num_workers ............................... 0 + 0: valid_weighted_split_names ...................... ['validation'] + 0: valid_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document']] + 0: valid_weighted_split_paths_path ................. None + 0: valid_weighted_split_splits ..................... [['0:1']] + 0: valid_weighted_split_weights .................... [['1.0']] + 0: virtual_pipeline_model_parallel_size ............ None + 0: vocab_extra_ids ................................. 0 + 0: vocab_file ...................................... gpt2/vocab.json + 0: weight_decay .................................... 0.1 + 0: world_size ...................................... 256 + 0: zero_allgather_bucket_size ...................... 0.0 + 0: zero_contigious_gradients ....................... False + 0: zero_reduce_bucket_size ......................... 0.0 + 0: zero_reduce_scatter ............................. False + 0: zero_stage ...................................... 0 + 0: -------------------- end of arguments --------------------- + 0: setting number of micro-batches to constant 32 + 0: > building GPT2BPETokenizer tokenizer ... + 0: > padded vocab (size: 50257) with 431 dummy tokens (new size: 50688) + 0: DeepSpeed general environment info: + 0: torch install path ............... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch'] + 0: torch version .................... 1.13.0+rocm5.2 + 0: torch cuda version ............... None + 0: torch hip version ................ 5.2.21151-afdc89f8 + 0: nvcc version ..................... None + 0: deepspeed install path ........... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/deepspeed'] + 0: deepspeed info ................... 0.7.5, unknown, unknown + 0: deepspeed wheel compiled w. ...... torch 1.13, hip 5.1 +31: > setting tensorboard ... + 0: **** Git info for Megatron: git_hash=unknown git_branch=unknown **** + 0: > initializing torch distributed ... + 0: [2023-05-25 13:37:21,763] [INFO] [comm.py:633:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl + 0: > initializing tensor model parallel with size 4 + 0: > initializing pipeline model parallel with size 4 + 0: > setting random seeds to 1234 ... + 0: > initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234 + 0: > compiling dataset index builder ... + 0: make: Entering directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' + 0: make: Nothing to be done for 'default'. + 0: make: Leaving directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' + 0: >>> done with dataset index builder. Compilation time: 0.096 seconds + 0: > compiling and loading fused kernels ... + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda_kernel.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_hip_kernel.hip [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] + 0: Total number of unsupported CUDA function calls: 0 + 0: + 0: + 0: Total number of replaced kernel launches: 67 + 0: ninja: no work to do. + 0: >>> done with compiling and loading fused kernels. Compilation time: 27.189 seconds + 0: time to initialize megatron (seconds): -0.168 + 0: [after megatron is initialized] datetime: 2023-05-25 13:37:51 + 0: building GPT model ... + 0: [2023-05-25 13:37:51,982] [INFO] [utils.py:827:see_memory_usage] Before Building Model + 0: [2023-05-25 13:37:51,984] [INFO] [utils.py:828:see_memory_usage] MA 0.0 GB Max_MA 0.0 GB CA 0.0 GB Max_CA 0 GB + 0: [2023-05-25 13:37:51,984] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 39.23 GB, percent = 7.8% + 0: SEED_LAYERS=False BASE_SEED=1234 SEED_FN=None + 0: Using topology: {ProcessCoord(pipe=0, data=0, model=0): 0, ProcessCoord(pipe=0, data=0, model=1): 1, ProcessCoord(pipe=0, data=0, model=2): 2, ProcessCoord(pipe=0, data=0, model=3): 3, ProcessCoord(pipe=0, data=1, model=0): 4, ProcessCoord(pipe=0, data=1, model=1): 5, ProcessCoord(pipe=0, data=1, model=2): 6, ProcessCoord(pipe=0, data=1, model=3): 7, ProcessCoord(pipe=0, data=2, model=0): 8, ProcessCoord(pipe=0, data=2, model=1): 9, ProcessCoord(pipe=0, data=2, model=2): 10, ProcessCoord(pipe=0, data=2, model=3): 11, ProcessCoord(pipe=0, data=3, model=0): 12, ProcessCoord(pipe=0, data=3, model=1): 13, ProcessCoord(pipe=0, data=3, model=2): 14, ProcessCoord(pipe=0, data=3, model=3): 15, ProcessCoord(pipe=0, data=4, model=0): 16, ProcessCoord(pipe=0, data=4, model=1): 17, ProcessCoord(pipe=0, data=4, model=2): 18, ProcessCoord(pipe=0, data=4, model=3): 19, ProcessCoord(pipe=0, data=5, model=0): 20, ProcessCoord(pipe=0, data=5, model=1): 21, ProcessCoord(pipe=0, data=5, model=2): 22, ProcessCoord(pipe=0, data=5, + 0: model=3): 23, ProcessCoord(pipe=0, data=6, model=0): 24, ProcessCoord(pipe=0, data=6, model=1): 25, ProcessCoord(pipe=0, data=6, model=2): 26, ProcessCoord(pipe=0, data=6, model=3): 27, ProcessCoord(pipe=0, data=7, model=0): 28, ProcessCoord(pipe=0, data=7, model=1): 29, ProcessCoord(pipe=0, data=7, model=2): 30, ProcessCoord(pipe=0, data=7, model=3): 31, ProcessCoord(pipe=0, data=8, model=0): 32, ProcessCoord(pipe=0, data=8, model=1): 33, ProcessCoord(pipe=0, data=8, model=2): 34, ProcessCoord(pipe=0, data=8, model=3): 35, ProcessCoord(pipe=0, data=9, model=0): 36, ProcessCoord(pipe=0, data=9, model=1): 37, ProcessCoord(pipe=0, data=9, model=2): 38, ProcessCoord(pipe=0, data=9, model=3): 39, ProcessCoord(pipe=0, data=10, model=0): 40, ProcessCoord(pipe=0, data=10, model=1): 41, ProcessCoord(pipe=0, data=10, model=2): 42, ProcessCoord(pipe=0, data=10, model=3): 43, ProcessCoord(pipe=0, data=11, model=0): 44, ProcessCoord(pipe=0, data=11, model=1): 45, ProcessCoord(pipe=0, data=11, model=2): 46, ProcessCoord( + 0: pipe=0, data=11, model=3): 47, ProcessCoord(pipe=0, data=12, model=0): 48, ProcessCoord(pipe=0, data=12, model=1): 49, ProcessCoord(pipe=0, data=12, model=2): 50, ProcessCoord(pipe=0, data=12, model=3): 51, ProcessCoord(pipe=0, data=13, model=0): 52, ProcessCoord(pipe=0, data=13, model=1): 53, ProcessCoord(pipe=0, data=13, model=2): 54, ProcessCoord(pipe=0, data=13, model=3): 55, ProcessCoord(pipe=0, data=14, model=0): 56, ProcessCoord(pipe=0, data=14, model=1): 57, ProcessCoord(pipe=0, data=14, model=2): 58, ProcessCoord(pipe=0, data=14, model=3): 59, ProcessCoord(pipe=0, data=15, model=0): 60, ProcessCoord(pipe=0, data=15, model=1): 61, ProcessCoord(pipe=0, data=15, model=2): 62, ProcessCoord(pipe=0, data=15, model=3): 63, ProcessCoord(pipe=1, data=0, model=0): 64, ProcessCoord(pipe=1, data=0, model=1): 65, ProcessCoord(pipe=1, data=0, model=2): 66, ProcessCoord(pipe=1, data=0, model=3): 67, ProcessCoord(pipe=1, data=1, model=0): 68, ProcessCoord(pipe=1, data=1, model=1): 69, ProcessCoord(pipe=1, data=1, mo + 0: del=2): 70, ProcessCoord(pipe=1, data=1, model=3): 71, ProcessCoord(pipe=1, data=2, model=0): 72, ProcessCoord(pipe=1, data=2, model=1): 73, ProcessCoord(pipe=1, data=2, model=2): 74, ProcessCoord(pipe=1, data=2, model=3): 75, ProcessCoord(pipe=1, data=3, model=0): 76, ProcessCoord(pipe=1, data=3, model=1): 77, ProcessCoord(pipe=1, data=3, model=2): 78, ProcessCoord(pipe=1, data=3, model=3): 79, ProcessCoord(pipe=1, data=4, model=0): 80, ProcessCoord(pipe=1, data=4, model=1): 81, ProcessCoord(pipe=1, data=4, model=2): 82, ProcessCoord(pipe=1, data=4, model=3): 83, ProcessCoord(pipe=1, data=5, model=0): 84, ProcessCoord(pipe=1, data=5, model=1): 85, ProcessCoord(pipe=1, data=5, model=2): 86, ProcessCoord(pipe=1, data=5, model=3): 87, ProcessCoord(pipe=1, data=6, model=0): 88, ProcessCoord(pipe=1, data=6, model=1): 89, ProcessCoord(pipe=1, data=6, model=2): 90, ProcessCoord(pipe=1, data=6, model=3): 91, ProcessCoord(pipe=1, data=7, model=0): 92, ProcessCoord(pipe=1, data=7, model=1): 93, ProcessCoord(pipe=1, da + 0: ta=7, model=2): 94, ProcessCoord(pipe=1, data=7, model=3): 95, ProcessCoord(pipe=1, data=8, model=0): 96, ProcessCoord(pipe=1, data=8, model=1): 97, ProcessCoord(pipe=1, data=8, model=2): 98, ProcessCoord(pipe=1, data=8, model=3): 99, ProcessCoord(pipe=1, data=9, model=0): 100, ProcessCoord(pipe=1, data=9, model=1): 101, ProcessCoord(pipe=1, data=9, model=2): 102, ProcessCoord(pipe=1, data=9, model=3): 103, ProcessCoord(pipe=1, data=10, model=0): 104, ProcessCoord(pipe=1, data=10, model=1): 105, ProcessCoord(pipe=1, data=10, model=2): 106, ProcessCoord(pipe=1, data=10, model=3): 107, ProcessCoord(pipe=1, data=11, model=0): 108, ProcessCoord(pipe=1, data=11, model=1): 109, ProcessCoord(pipe=1, data=11, model=2): 110, ProcessCoord(pipe=1, data=11, model=3): 111, ProcessCoord(pipe=1, data=12, model=0): 112, ProcessCoord(pipe=1, data=12, model=1): 113, ProcessCoord(pipe=1, data=12, model=2): 114, ProcessCoord(pipe=1, data=12, model=3): 115, ProcessCoord(pipe=1, data=13, model=0): 116, ProcessCoord(pipe=1, data=13 + 0: , model=1): 117, ProcessCoord(pipe=1, data=13, model=2): 118, ProcessCoord(pipe=1, data=13, model=3): 119, ProcessCoord(pipe=1, data=14, model=0): 120, ProcessCoord(pipe=1, data=14, model=1): 121, ProcessCoord(pipe=1, data=14, model=2): 122, ProcessCoord(pipe=1, data=14, model=3): 123, ProcessCoord(pipe=1, data=15, model=0): 124, ProcessCoord(pipe=1, data=15, model=1): 125, ProcessCoord(pipe=1, data=15, model=2): 126, ProcessCoord(pipe=1, data=15, model=3): 127, ProcessCoord(pipe=2, data=0, model=0): 128, ProcessCoord(pipe=2, data=0, model=1): 129, ProcessCoord(pipe=2, data=0, model=2): 130, ProcessCoord(pipe=2, data=0, model=3): 131, ProcessCoord(pipe=2, data=1, model=0): 132, ProcessCoord(pipe=2, data=1, model=1): 133, ProcessCoord(pipe=2, data=1, model=2): 134, ProcessCoord(pipe=2, data=1, model=3): 135, ProcessCoord(pipe=2, data=2, model=0): 136, ProcessCoord(pipe=2, data=2, model=1): 137, ProcessCoord(pipe=2, data=2, model=2): 138, ProcessCoord(pipe=2, data=2, model=3): 139, ProcessCoord(pipe=2, data=3, + 0: model=0): 140, ProcessCoord(pipe=2, data=3, model=1): 141, ProcessCoord(pipe=2, data=3, model=2): 142, ProcessCoord(pipe=2, data=3, model=3): 143, ProcessCoord(pipe=2, data=4, model=0): 144, ProcessCoord(pipe=2, data=4, model=1): 145, ProcessCoord(pipe=2, data=4, model=2): 146, ProcessCoord(pipe=2, data=4, model=3): 147, ProcessCoord(pipe=2, data=5, model=0): 148, ProcessCoord(pipe=2, data=5, model=1): 149, ProcessCoord(pipe=2, data=5, model=2): 150, ProcessCoord(pipe=2, data=5, model=3): 151, ProcessCoord(pipe=2, data=6, model=0): 152, ProcessCoord(pipe=2, data=6, model=1): 153, ProcessCoord(pipe=2, data=6, model=2): 154, ProcessCoord(pipe=2, data=6, model=3): 155, ProcessCoord(pipe=2, data=7, model=0): 156, ProcessCoord(pipe=2, data=7, model=1): 157, ProcessCoord(pipe=2, data=7, model=2): 158, ProcessCoord(pipe=2, data=7, model=3): 159, ProcessCoord(pipe=2, data=8, model=0): 160, ProcessCoord(pipe=2, data=8, model=1): 161, ProcessCoord(pipe=2, data=8, model=2): 162, ProcessCoord(pipe=2, data=8, model=3): 16 + 0: 3, ProcessCoord(pipe=2, data=9, model=0): 164, ProcessCoord(pipe=2, data=9, model=1): 165, ProcessCoord(pipe=2, data=9, model=2): 166, ProcessCoord(pipe=2, data=9, model=3): 167, ProcessCoord(pipe=2, data=10, model=0): 168, ProcessCoord(pipe=2, data=10, model=1): 169, ProcessCoord(pipe=2, data=10, model=2): 170, ProcessCoord(pipe=2, data=10, model=3): 171, ProcessCoord(pipe=2, data=11, model=0): 172, ProcessCoord(pipe=2, data=11, model=1): 173, ProcessCoord(pipe=2, data=11, model=2): 174, ProcessCoord(pipe=2, data=11, model=3): 175, ProcessCoord(pipe=2, data=12, model=0): 176, ProcessCoord(pipe=2, data=12, model=1): 177, ProcessCoord(pipe=2, data=12, model=2): 178, ProcessCoord(pipe=2, data=12, model=3): 179, ProcessCoord(pipe=2, data=13, model=0): 180, ProcessCoord(pipe=2, data=13, model=1): 181, ProcessCoord(pipe=2, data=13, model=2): 182, ProcessCoord(pipe=2, data=13, model=3): 183, ProcessCoord(pipe=2, data=14, model=0): 184, ProcessCoord(pipe=2, data=14, model=1): 185, ProcessCoord(pipe=2, data=14, model + 0: =2): 186, ProcessCoord(pipe=2, data=14, model=3): 187, ProcessCoord(pipe=2, data=15, model=0): 188, ProcessCoord(pipe=2, data=15, model=1): 189, ProcessCoord(pipe=2, data=15, model=2): 190, ProcessCoord(pipe=2, data=15, model=3): 191, ProcessCoord(pipe=3, data=0, model=0): 192, ProcessCoord(pipe=3, data=0, model=1): 193, ProcessCoord(pipe=3, data=0, model=2): 194, ProcessCoord(pipe=3, data=0, model=3): 195, ProcessCoord(pipe=3, data=1, model=0): 196, ProcessCoord(pipe=3, data=1, model=1): 197, ProcessCoord(pipe=3, data=1, model=2): 198, ProcessCoord(pipe=3, data=1, model=3): 199, ProcessCoord(pipe=3, data=2, model=0): 200, ProcessCoord(pipe=3, data=2, model=1): 201, ProcessCoord(pipe=3, data=2, model=2): 202, ProcessCoord(pipe=3, data=2, model=3): 203, ProcessCoord(pipe=3, data=3, model=0): 204, ProcessCoord(pipe=3, data=3, model=1): 205, ProcessCoord(pipe=3, data=3, model=2): 206, ProcessCoord(pipe=3, data=3, model=3): 207, ProcessCoord(pipe=3, data=4, model=0): 208, ProcessCoord(pipe=3, data=4, model=1): 20 + 0: 9, ProcessCoord(pipe=3, data=4, model=2): 210, ProcessCoord(pipe=3, data=4, model=3): 211, ProcessCoord(pipe=3, data=5, model=0): 212, ProcessCoord(pipe=3, data=5, model=1): 213, ProcessCoord(pipe=3, data=5, model=2): 214, ProcessCoord(pipe=3, data=5, model=3): 215, ProcessCoord(pipe=3, data=6, model=0): 216, ProcessCoord(pipe=3, data=6, model=1): 217, ProcessCoord(pipe=3, data=6, model=2): 218, ProcessCoord(pipe=3, data=6, model=3): 219, ProcessCoord(pipe=3, data=7, model=0): 220, ProcessCoord(pipe=3, data=7, model=1): 221, ProcessCoord(pipe=3, data=7, model=2): 222, ProcessCoord(pipe=3, data=7, model=3): 223, ProcessCoord(pipe=3, data=8, model=0): 224, ProcessCoord(pipe=3, data=8, model=1): 225, ProcessCoord(pipe=3, data=8, model=2): 226, ProcessCoord(pipe=3, data=8, model=3): 227, ProcessCoord(pipe=3, data=9, model=0): 228, ProcessCoord(pipe=3, data=9, model=1): 229, ProcessCoord(pipe=3, data=9, model=2): 230, ProcessCoord(pipe=3, data=9, model=3): 231, ProcessCoord(pipe=3, data=10, model=0): 232, ProcessC + 0: oord(pipe=3, data=10, model=1): 233, ProcessCoord(pipe=3, data=10, model=2): 234, ProcessCoord(pipe=3, data=10, model=3): 235, ProcessCoord(pipe=3, data=11, model=0): 236, ProcessCoord(pipe=3, data=11, model=1): 237, ProcessCoord(pipe=3, data=11, model=2): 238, ProcessCoord(pipe=3, data=11, model=3): 239, ProcessCoord(pipe=3, data=12, model=0): 240, ProcessCoord(pipe=3, data=12, model=1): 241, ProcessCoord(pipe=3, data=12, model=2): 242, ProcessCoord(pipe=3, data=12, model=3): 243, ProcessCoord(pipe=3, data=13, model=0): 244, ProcessCoord(pipe=3, data=13, model=1): 245, ProcessCoord(pipe=3, data=13, model=2): 246, ProcessCoord(pipe=3, data=13, model=3): 247, ProcessCoord(pipe=3, data=14, model=0): 248, ProcessCoord(pipe=3, data=14, model=1): 249, ProcessCoord(pipe=3, data=14, model=2): 250, ProcessCoord(pipe=3, data=14, model=3): 251, ProcessCoord(pipe=3, data=15, model=0): 252, ProcessCoord(pipe=3, data=15, model=1): 253, ProcessCoord(pipe=3, data=15, model=2): 254, ProcessCoord(pipe=3, data=15, model=3): 25 + 0: 5} + 0: [2023-05-25 13:37:53,694] [INFO] [module.py:366:_partition_layers] Partitioning pipeline stages with method type:transformer + 0: stage=0 layers=14 + 0: 0: _to_float16 + 0: 1: EmbeddingPipe + 0: 2: + 0: 3: ParallelTransformerLayerPipe + 0: 4: ParallelTransformerLayerPipe + 0: 5: ParallelTransformerLayerPipe + 0: 6: ParallelTransformerLayerPipe + 0: 7: ParallelTransformerLayerPipe + 0: 8: ParallelTransformerLayerPipe + 0: 9: ParallelTransformerLayerPipe + 0: 10: ParallelTransformerLayerPipe + 0: 11: ParallelTransformerLayerPipe + 0: 12: ParallelTransformerLayerPipe + 0: 13: ParallelTransformerLayerPipe + 0: stage=1 layers=11 + 0: 14: ParallelTransformerLayerPipe + 0: 15: ParallelTransformerLayerPipe + 0: 16: ParallelTransformerLayerPipe + 0: 17: ParallelTransformerLayerPipe + 0: 18: ParallelTransformerLayerPipe + 0: 19: ParallelTransformerLayerPipe + 0: 20: ParallelTransformerLayerPipe + 0: 21: ParallelTransformerLayerPipe + 0: 22: ParallelTransformerLayerPipe + 0: 23: ParallelTransformerLayerPipe + 0: 24: ParallelTransformerLayerPipe + 0: stage=2 layers=11 + 0: 25: ParallelTransformerLayerPipe + 0: 26: ParallelTransformerLayerPipe + 0: 27: ParallelTransformerLayerPipe + 0: 28: ParallelTransformerLayerPipe + 0: 29: ParallelTransformerLayerPipe + 0: 30: ParallelTransformerLayerPipe + 0: 31: ParallelTransformerLayerPipe + 0: 32: ParallelTransformerLayerPipe + 0: 33: ParallelTransformerLayerPipe + 0: 34: ParallelTransformerLayerPipe + 0: 35: ParallelTransformerLayerPipe + 0: stage=3 layers=13 + 0: 36: ParallelTransformerLayerPipe + 0: 37: ParallelTransformerLayerPipe + 0: 38: ParallelTransformerLayerPipe + 0: 39: ParallelTransformerLayerPipe + 0: 40: ParallelTransformerLayerPipe + 0: 41: ParallelTransformerLayerPipe + 0: 42: ParallelTransformerLayerPipe + 0: 43: ParallelTransformerLayerPipe + 0: 44: ParallelTransformerLayerPipe + 0: 45: undo + 0: 46: MixedFusedLayerNorm + 0: 47: EmbeddingPipe + 0: 48: float16_to_fp32 + 0: loss: CrossEntropy + 0: [2023-05-25 13:37:55,326] [INFO] [utils.py:827:see_memory_usage] After Building Model + 0: [2023-05-25 13:37:55,327] [INFO] [utils.py:828:see_memory_usage] MA 1.16 GB Max_MA 1.16 GB CA 1.19 GB Max_CA 1 GB + 0: [2023-05-25 13:37:55,327] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 39.98 GB, percent = 7.9% + 0: setting training iterations to 0 + 0: > learning rate decay style: cosine + 0: DeepSpeed is enabled. + 0: [2023-05-25 13:37:55,329] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.5, git-hash=unknown, git-branch=unknown + 0: [2023-05-25 13:37:56,124] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False + 0: [2023-05-25 13:37:56,125] [INFO] [logging.py:68:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer + 0: [2023-05-25 13:37:56,125] [INFO] [logging.py:68:log_dist] [Rank 0] Using client Optimizer as basic optimizer + 0: [2023-05-25 13:37:56,128] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam + 0: [2023-05-25 13:37:56,128] [INFO] [logging.py:68:log_dist] [Rank 0] Creating BF16 optimizer + 8: ninja: no work to do. + 8: Time to load utils op: 0.30178356170654297 seconds + 0: [2023-05-25 13:37:56,266] [INFO] [utils.py:827:see_memory_usage] begin bf16_optimizer + 0: [2023-05-25 13:37:56,267] [INFO] [utils.py:828:see_memory_usage] MA 1.15 GB Max_MA 1.18 GB CA 1.21 GB Max_CA 1 GB + 0: [2023-05-25 13:37:56,267] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 40.57 GB, percent = 8.1% + 4: ninja: no work to do. +27: Time to load utils op: 0.23575830459594727 seconds +25: Time to load utils op: 0.23824620246887207 seconds +29: Time to load utils op: 0.23657011985778809 seconds +31: Time to load utils op: 0.23646783828735352 seconds + 4: Time to load utils op: 0.2690882682800293 seconds +19: Time to load utils op: 0.438152551651001 seconds +27: Time to load utils op: 0.5091795921325684 seconds +27: Time to load utils op: 0.5093820095062256 seconds + 4: Time to load utils op: 0.4035837650299072 seconds + 4: Time to load utils op: 0.40418434143066406 secondsTime to load utils op: 0.4037165641784668 seconds + 4: + 4: Time to load utils op: 0.4037313461303711 seconds + 4: Time to load utils op: 0.20209789276123047 seconds + 4: Time to load utils op: 0.20212411880493164 seconds + 4: Time to load utils op: 0.5114321708679199 seconds + 8: Time to load utils op: 0.7036423683166504 seconds + 8: Time to load utils op: 0.7027533054351807 seconds +27: Time to load utils op: 0.4047553539276123 seconds +31: Time to load utils op: 0.4048454761505127 seconds +27: Time to load utils op: 0.4050273895263672 seconds +27: Time to load utils op: 0.5087499618530273 seconds +25: Time to load utils op: 0.4051194190979004 seconds +25: Time to load utils op: 0.5113706588745117 seconds +29: Time to load utils op: 0.40415000915527344 seconds +31: Time to load utils op: 0.5094578266143799 seconds +19: Time to load utils op: 0.7111871242523193 seconds +31: Time to load utils op: 0.4053914546966553 seconds +25: Time to load utils op: 0.40567612648010254 seconds +25: Time to load utils op: 0.5124614238739014 seconds +29: Time to load utils op: 0.5098941326141357 seconds +25: Time to load utils op: 0.5127496719360352 seconds +29: Time to load utils op: 0.4054267406463623 seconds + 1: Time to load utils op: 0.2576630115509033 seconds + 1: Time to load utils op: 0.45353198051452637 seconds + 1: Time to load utils op: 0.257843017578125 seconds + 1: Time to load utils op: 0.45613670349121094 seconds + 1: Time to load utils op: 0.562053918838501 seconds + 1: Time to load utils op: 0.4560363292694092 secondsTime to load utils op: 0.5620720386505127 seconds + 1: Time to load utils op: 0.45362186431884766 seconds + 1: +12: Time to load utils op: 0.7370617389678955 secondsTime to load utils op: 0.660893440246582 seconds +12: +12: Time to load utils op: 0.7377474308013916 seconds +19: Time to load utils op: 0.10208749771118164 seconds +19: Time to load utils op: 0.10207986831665039 seconds +31: Time to load utils op: 0.6068530082702637 seconds + 0: Time to load utils op: 0.4332597255706787 seconds + 0: Time to load utils op: 0.5711750984191895 seconds + 0: Time to load utils op: 0.45577406883239746 seconds + 0: Time to load utils op: 0.4585137367248535 seconds +31: Time to load utils op: 0.6070947647094727 seconds + 0: Time to load utils op: 0.4612240791320801 seconds +10: Time to load utils op: 0.6634395122528076 seconds +10: Time to load utils op: 0.6613531112670898 seconds +10: Time to load utils op: 0.7375209331512451 secondsTime to load utils op: 0.737938642501831 seconds +10: +21: Time to load utils op: 0.6615009307861328 seconds +21: Time to load utils op: 0.10659575462341309 seconds +21: Time to load utils op: 0.10660076141357422 seconds +21: Time to load utils op: 0.6614029407501221 seconds + 6: Time to load utils op: 0.25034117698669434 seconds + 6: Time to load utils op: 0.45963382720947266 seconds + 6: Time to load utils op: 0.25041985511779785 seconds + 6: Time to load utils op: 0.45969414710998535 seconds + 6: Time to load utils op: 0.4598839282989502 secondsTime to load utils op: 0.4597182273864746 seconds + 6: + 6: Time to load utils op: 0.5618906021118164 seconds + 6: Time to load utils op: 0.5621991157531738 seconds +11: Time to load utils op: 0.7446906566619873 seconds +11: Time to load utils op: 0.7448182106018066 seconds + 2: Time to load utils op: 0.46088242530822754 secondsTime to load utils op: 0.25507473945617676 seconds + 2: Time to load utils op: 0.4611179828643799 seconds + 2: + 2: Time to load utils op: 0.4609088897705078 seconds + 2: Time to load utils op: 0.5657992362976074 secondsTime to load utils op: 0.2545464038848877 seconds + 2: Time to load utils op: 0.5657992362976074 seconds + 2: + 2: Time to load utils op: 0.46126651763916016 seconds + 5: Time to load utils op: 0.25250864028930664 seconds + 5: Time to load utils op: 0.2522706985473633 seconds + 5: Time to load utils op: 0.4619481563568115 seconds + 5: Time to load utils op: 0.4620935916900635 seconds + 5: Time to load utils op: 0.4618828296661377 secondsTime to load utils op: 0.4619159698486328 seconds + 5: + 5: Time to load utils op: 0.561814546585083 seconds + 5: Time to load utils op: 0.561821699142456 seconds +21: Time to load utils op: 0.7082655429840088 seconds + 7: Time to load utils op: 0.2521176338195801 seconds + 7: Time to load utils op: 0.46318626403808594 secondsTime to load utils op: 0.5657186508178711 secondsTime to load utils op: 0.25212836265563965 seconds + 7: + 7: + 7: Time to load utils op: 0.4626500606536865 seconds + 7: Time to load utils op: 0.46317338943481445 secondsTime to load utils op: 0.4628410339355469 seconds + 7: + 7: Time to load utils op: 0.5657434463500977 seconds +14: Time to load utils op: 0.7455112934112549 seconds +14: Time to load utils op: 0.7455756664276123 seconds +21: Time to load utils op: 0.7084395885467529 seconds + 9: Time to load utils op: 0.7485213279724121 seconds + 9: Time to load utils op: 0.7485222816467285 seconds +29: Time to load utils op: 0.6059412956237793 seconds +21: Time to load utils op: 0.6028850078582764 seconds +21: Time to load utils op: 0.603020429611206 seconds +29: Time to load utils op: 0.6060101985931396 seconds +13: Time to load utils op: 0.7481284141540527 secondsTime to load utils op: 0.7480945587158203 seconds +13: +15: Time to load utils op: 0.7479209899902344 seconds +15: Time to load utils op: 0.7480747699737549 seconds + 3: Time to load utils op: 0.26064062118530273 seconds + 3: Time to load utils op: 0.26056909561157227 seconds + 3: Time to load utils op: 0.4670083522796631 seconds + 3: Time to load utils op: 0.5691032409667969 secondsTime to load utils op: 0.569145917892456 seconds + 3: + 3: Time to load utils op: 0.4664738178253174 seconds + 3: Time to load utils op: 0.46637892723083496 seconds + 3: Time to load utils op: 0.46645474433898926 seconds +24: Time to load utils op: 0.5560553073883057 secondsTime to load utils op: 0.5558757781982422 secondsTime to load utils op: 0.4384150505065918 seconds +24: +24: Time to load utils op: 0.43471837043762207 seconds +24: +19: Time to load utils op: 0.6093926429748535 seconds +19: Time to load utils op: 0.6096105575561523 seconds +18: Time to load utils op: 0.12158584594726562 seconds +18: Time to load utils op: 0.12160086631774902 seconds +18: Time to load utils op: 0.6143319606781006 seconds +18: Time to load utils op: 0.7552504539489746 seconds +18: Time to load utils op: 0.6144566535949707 seconds +18: Time to load utils op: 0.7540421485900879 seconds +22: Time to load utils op: 0.12187433242797852 secondsTime to load utils op: 0.12182140350341797 seconds +22: +22: Time to load utils op: 0.7060589790344238 seconds +22: Time to load utils op: 0.6143419742584229 secondsTime to load utils op: 0.6147100925445557 secondsTime to load utils op: 0.7060697078704834 seconds +22: +22: +26: Time to load utils op: 0.44120168685913086 seconds +26: Time to load utils op: 0.5481042861938477 secondsTime to load utils op: 0.5480649471282959 seconds +26: +26: Time to load utils op: 0.589282751083374 seconds +26: Time to load utils op: 0.4412698745727539 seconds +26: Time to load utils op: 0.5893232822418213 seconds + 8: Time to load utils op: 0.7042996883392334 seconds +20: Time to load utils op: 0.1278538703918457 seconds +20: Time to load utils op: 0.1278393268585205 seconds +20: Time to load utils op: 0.620067834854126 seconds +20: Time to load utils op: 0.6201231479644775 seconds +20: Time to load utils op: 0.7358834743499756 seconds +20: Time to load utils op: 0.7359130382537842 seconds +23: Time to load utils op: 0.13059258460998535 secondsTime to load utils op: 0.13062262535095215 seconds +23: +23: Time to load utils op: 0.6215388774871826 seconds +23: Time to load utils op: 0.6220412254333496 seconds +23: Time to load utils op: 0.7129933834075928 seconds + 9: Time to load utils op: 0.7032003402709961 seconds +23: Time to load utils op: 0.7130067348480225 seconds +12: Time to load utils op: 0.7025938034057617 seconds + 9: Time to load utils op: 0.7030534744262695 seconds + 8: Time to load utils op: 0.7033205032348633 seconds +16: Time to load utils op: 0.13652253150939941 secondsTime to load utils op: 0.13122868537902832 seconds +16: +16: Time to load utils op: 0.6476225852966309 secondsTime to load utils op: 0.6473546028137207 seconds +16: +13: Time to load utils op: 0.7033224105834961 seconds +11: Time to load utils op: 0.7037146091461182 seconds +16: Time to load utils op: 0.6476404666900635 seconds +16: Time to load utils op: 0.6138520240783691 secondsTime to load utils op: 0.613917350769043 seconds +16: +16: Time to load utils op: 0.647650957107544 seconds +15: Time to load utils op: 0.7038397789001465 seconds +12: Time to load utils op: 0.7036304473876953 seconds +13: Time to load utils op: 0.7037298679351807 seconds +15: Time to load utils op: 0.7037575244903564 seconds +11: Time to load utils op: 0.70395827293396 seconds +14: Time to load utils op: 0.7038009166717529 seconds +24: Time to load utils op: 0.202284574508667 seconds +14: Time to load utils op: 0.7040200233459473 seconds + 8: Time to load utils op: 0.8194050788879395 seconds +17: Time to load utils op: 0.14082074165344238 seconds +17: Time to load utils op: 0.1407914161682129 seconds +17: Time to load utils op: 0.6432876586914062 secondsTime to load utils op: 0.6432888507843018 seconds +17: +17: Time to load utils op: 0.6433193683624268 secondsTime to load utils op: 0.6123011112213135 secondsTime to load utils op: 0.6431088447570801 seconds +17: +17: +17: Time to load utils op: 0.6123988628387451 seconds + 8: Time to load utils op: 0.0005354881286621094 seconds + 8: Time to load utils op: 0.0005872249603271484 seconds + 8: Time to load utils op: 0.0005140304565429688 seconds + 8: Time to load utils op: 0.0004918575286865234 seconds + 8: Time to load utils op: 0.0008852481842041016 secondsTime to load utils op: 0.0008988380432128906 seconds + 8: +31: Time to load utils op: 0.0005307197570800781 seconds +31: Time to load utils op: 0.0005800724029541016 seconds +31: Time to load utils op: 0.0005927085876464844 seconds +31: Time to load utils op: 0.0006144046783447266 secondsTime to load utils op: 0.0006015300750732422 seconds +31: +31: Time to load utils op: 0.0006074905395507812 seconds + 4: Time to load utils op: 0.0005595684051513672 seconds + 4: Time to load utils op: 0.0006244182586669922 secondsTime to load utils op: 0.0006279945373535156 secondsTime to load utils op: 0.0006160736083984375 seconds + 4: Time to load utils op: 0.0006418228149414062 seconds + 4: + 4: +29: Time to load utils op: 0.00036144256591796875 seconds + 4: Time to load utils op: 0.0006935596466064453 seconds + 4: Time to load utils op: 0.0006651878356933594 seconds + 4: Time to load utils op: 0.0005555152893066406 seconds +29: Time to load utils op: 0.0005433559417724609 seconds +19: Time to load utils op: 0.0005021095275878906 seconds +19: Time to load utils op: 0.0005240440368652344 seconds +27: Time to load utils op: 0.0005354881286621094 seconds +25: Time to load utils op: 0.0005118846893310547 seconds +27: Time to load utils op: 0.0005426406860351562 secondsTime to load utils op: 0.0005259513854980469 seconds +27: +25: Time to load utils op: 0.00041222572326660156 seconds +19: Time to load utils op: 0.0005705356597900391 seconds +29: Time to load utils op: 0.000476837158203125 seconds +27: Time to load utils op: 0.0006022453308105469 seconds +27: Time to load utils op: 0.0005571842193603516 seconds +27: Time to load utils op: 0.0005903244018554688 seconds +29: Time to load utils op: 0.0004334449768066406 seconds +29: Time to load utils op: 0.0004489421844482422 seconds +25: Time to load utils op: 0.0005373954772949219 seconds +25: Time to load utils op: 0.0005571842193603516 seconds +25: Time to load utils op: 0.0005788803100585938 seconds +25: Time to load utils op: 0.000576019287109375 seconds +29: Time to load utils op: 0.0004904270172119141 seconds +19: Time to load utils op: 0.0004885196685791016 seconds +19: Time to load utils op: 0.0004863739013671875 seconds +19: Time to load utils op: 0.0005009174346923828 seconds +30: Time to load utils op: 0.4806244373321533 secondsTime to load utils op: 0.480745792388916 seconds +30: +30: Time to load utils op: 0.5842585563659668 seconds +28: Time to load utils op: 0.5876946449279785 seconds +28: Time to load utils op: 0.6461985111236572 seconds +28: Time to load utils op: 0.5877132415771484 secondsTime to load utils op: 0.6461911201477051 seconds +28: +28: Time to load utils op: 0.4819180965423584 secondsTime to load utils op: 0.48189759254455566 seconds +28: +30: Time to load utils op: 0.6484808921813965 seconds +30: Time to load utils op: 0.5842554569244385 seconds +30: Time to load utils op: 0.6485540866851807 seconds + 0: [2023-05-25 13:37:56,818] [INFO] [utils.py:827:see_memory_usage] before initializing group 0 + 0: [2023-05-25 13:37:56,819] [INFO] [utils.py:828:see_memory_usage] MA 1.15 GB Max_MA 1.15 GB CA 1.21 GB Max_CA 1 GB + 0: [2023-05-25 13:37:56,819] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 40.57 GB, percent = 8.1% +11: Time to load utils op: 0.0004999637603759766 seconds +11: Time to load utils op: 0.0004982948303222656 seconds +11: Time to load utils op: 0.0005078315734863281 seconds +11: Time to load utils op: 0.00044846534729003906 seconds +26: Time to load utils op: 0.0005154609680175781 seconds +26: Time to load utils op: 0.0005376338958740234 secondsTime to load utils op: 0.0005030632019042969 seconds +26: +26: Time to load utils op: 0.0008871555328369141 seconds +26: Time to load utils op: 0.0008716583251953125 seconds +26: Time to load utils op: 0.0009043216705322266 seconds + 0: Time to load utils op: 0.0005297660827636719 seconds + 0: Time to load utils op: 0.0005872249603271484 seconds + 0: Time to load utils op: 0.0005693435668945312 seconds +14: Time to load utils op: 0.00042724609375 seconds +14: Time to load utils op: 0.00031876564025878906 seconds +14: Time to load utils op: 0.0003883838653564453 seconds +22: Time to load utils op: 0.0005276203155517578 seconds +14: Time to load utils op: 0.0005440711975097656 seconds +22: Time to load utils op: 0.0005440711975097656 seconds +22: Time to load utils op: 0.0005826950073242188 seconds +22: Time to load utils op: 0.0005676746368408203 seconds +22: Time to load utils op: 0.0005846023559570312 seconds + 0: Time to load utils op: 0.0005750656127929688 seconds +22: Time to load utils op: 0.0006031990051269531 seconds + 7: Time to load utils op: 0.000568389892578125 seconds + 7: Time to load utils op: 0.0005848407745361328 seconds + 7: Time to load utils op: 0.00041174888610839844 secondsTime to load utils op: 0.00041103363037109375 seconds + 7: + 7: Time to load utils op: 0.0006411075592041016 secondsTime to load utils op: 0.0006239414215087891 secondsTime to load utils op: 0.0006394386291503906 seconds + 7: + 7: + 7: Time to load utils op: 0.0006632804870605469 seconds + 1: Time to load utils op: 0.0005211830139160156 seconds + 1: Time to load utils op: 0.0005395412445068359 secondsTime to load utils op: 0.0005125999450683594 seconds + 1: + 1: Time to load utils op: 0.0005517005920410156 seconds + 1: Time to load utils op: 0.0005724430084228516 seconds + 1: Time to load utils op: 0.0006003379821777344 seconds + 1: Time to load utils op: 0.0006380081176757812 seconds + 1: Time to load utils op: 0.0006444454193115234 seconds + 5: Time to load utils op: 0.0004296302795410156 seconds + 5: Time to load utils op: 0.00043320655822753906 seconds + 5: Time to load utils op: 0.0004394054412841797 seconds + 5: Time to load utils op: 0.0005700588226318359 seconds + 5: Time to load utils op: 0.0004134178161621094 seconds + 5: Time to load utils op: 0.0006062984466552734 secondsTime to load utils op: 0.0005769729614257812 seconds + 5: + 5: Time to load utils op: 0.0006070137023925781 seconds +15: Time to load utils op: 0.00045561790466308594 seconds +15: Time to load utils op: 0.0005035400390625 seconds +16: Time to load utils op: 0.0005047321319580078 seconds +15: Time to load utils op: 0.0006430149078369141 seconds +16: Time to load utils op: 0.0004985332489013672 seconds +15: Time to load utils op: 0.0005288124084472656 seconds +16: Time to load utils op: 0.0004291534423828125 seconds +16: Time to load utils op: 0.0004525184631347656 secondsTime to load utils op: 0.00044345855712890625 secondsTime to load utils op: 0.0004315376281738281 seconds +16: +16: +16: Time to load utils op: 0.0005676746368408203 seconds +16: Time to load utils op: 0.0005621910095214844 seconds +18: Time to load utils op: 0.0005075931549072266 secondsTime to load utils op: 0.0004265308380126953 seconds +18: Time to load utils op: 0.0004279613494873047 seconds +18: +18: Time to load utils op: 0.0005202293395996094 seconds +18: Time to load utils op: 0.0005521774291992188 seconds +18: Time to load utils op: 0.00040721893310546875 seconds +17: Time to load utils op: 0.0009181499481201172 seconds +17: Time to load utils op: 0.0012173652648925781 seconds +17: Time to load utils op: 0.0011553764343261719 seconds +17: Time to load utils op: 0.0011568069458007812 seconds +17: Time to load utils op: 0.00116729736328125 seconds +17: Time to load utils op: 0.0011680126190185547 seconds +17: Time to load utils op: 0.0011627674102783203 seconds +17: Time to load utils op: 0.0012166500091552734 seconds +28: Time to load utils op: 0.0005033016204833984 seconds +28: Time to load utils op: 0.00052642822265625 seconds +28: Time to load utils op: 0.0004949569702148438 seconds +28: Time to load utils op: 0.0005240440368652344 seconds +28: Time to load utils op: 0.0005254745483398438 seconds +28: Time to load utils op: 0.0005373954772949219 seconds +21: Time to load utils op: 0.0005102157592773438 seconds +21: Time to load utils op: 0.0005254745483398438 seconds +21: Time to load utils op: 0.0005786418914794922 seconds +21: Time to load utils op: 0.0005681514739990234 seconds +21: Time to load utils op: 0.0006284713745117188 seconds +21: Time to load utils op: 0.0006606578826904297 secondsTime to load utils op: 0.0006785392761230469 seconds +21: Time to load utils op: 0.0006337165832519531 seconds +21: +12: Time to load utils op: 0.0005335807800292969 seconds +12: Time to load utils op: 0.0005369186401367188 seconds +12: Time to load utils op: 0.00037288665771484375 seconds +12: Time to load utils op: 0.0009567737579345703 seconds +12: Time to load utils op: 0.001031637191772461 seconds +13: Time to load utils op: 0.0004525184631347656 seconds + 9: Time to load utils op: 0.0004379749298095703 seconds +13: Time to load utils op: 0.0004355907440185547 seconds +13: Time to load utils op: 0.0004734992980957031 seconds +13: Time to load utils op: 0.0005164146423339844 seconds + 9: Time to load utils op: 0.0004668235778808594 seconds + 9: Time to load utils op: 0.00047016143798828125 seconds + 9: Time to load utils op: 0.0004749298095703125 seconds +20: Time to load utils op: 0.0004754066467285156 seconds +20: Time to load utils op: 0.0005810260772705078 seconds +20: Time to load utils op: 0.0005903244018554688 seconds +30: Time to load utils op: 0.0005154609680175781 seconds +30: Time to load utils op: 0.0005581378936767578 seconds +20: Time to load utils op: 0.0008459091186523438 seconds +30: Time to load utils op: 0.0004153251647949219 seconds +20: Time to load utils op: 0.0008363723754882812 seconds +20: Time to load utils op: 0.0008089542388916016 seconds +30: Time to load utils op: 0.0004482269287109375 seconds +30: Time to load utils op: 0.00048279762268066406 seconds +30: Time to load utils op: 0.0005724430084228516 seconds +10: Time to load utils op: 0.00045037269592285156 seconds +10: Time to load utils op: 0.0003428459167480469 seconds +10: Time to load utils op: 0.0004811286926269531 seconds +10: Time to load utils op: 0.0005216598510742188 seconds +23: Time to load utils op: 0.000377655029296875 seconds +23: Time to load utils op: 0.00048351287841796875 seconds +23: Time to load utils op: 0.0004029273986816406 seconds +23: Time to load utils op: 0.00048804283142089844 seconds +23: Time to load utils op: 0.0004127025604248047 seconds + 3: Time to load utils op: 0.0005886554718017578 seconds + 3: Time to load utils op: 0.0005872249603271484 seconds + 3: Time to load utils op: 0.0005955696105957031 seconds +23: Time to load utils op: 0.0007665157318115234 seconds + 3: Time to load utils op: 0.0005393028259277344 seconds + 3: Time to load utils op: 0.0005140304565429688 seconds + 3: Time to load utils op: 0.0004215240478515625 seconds + 3: Time to load utils op: 0.00042366981506347656 seconds + 3: Time to load utils op: 0.00044608116149902344 seconds +24: Time to load utils op: 0.0005261898040771484 seconds +24: Time to load utils op: 0.0005981922149658203 seconds +24: Time to load utils op: 0.0006041526794433594 seconds +24: Time to load utils op: 0.0006668567657470703 seconds +24: Time to load utils op: 0.0007162094116210938 seconds + 2: Time to load utils op: 0.00041174888610839844 seconds + 2: Time to load utils op: 0.00041556358337402344 seconds + 2: Time to load utils op: 0.0005459785461425781 seconds + 2: Time to load utils op: 0.0004086494445800781 seconds + 6: Time to load utils op: 0.0005621910095214844 seconds + 2: Time to load utils op: 0.0005383491516113281 seconds + 6: Time to load utils op: 0.0005805492401123047 secondsTime to load utils op: 0.000553131103515625 seconds + 6: + 6: Time to load utils op: 0.0006177425384521484 seconds + 6: Time to load utils op: 0.0006079673767089844 seconds + 6: Time to load utils op: 0.0005972385406494141 seconds + 2: Time to load utils op: 0.0005815029144287109 seconds + 6: Time to load utils op: 0.0005533695220947266 seconds + 6: Time to load utils op: 0.0006730556488037109 seconds + 2: Time to load utils op: 0.0006244182586669922 secondsTime to load utils op: 0.0006330013275146484 seconds + 2: + 0: [2023-05-25 13:37:57,005] [INFO] [utils.py:827:see_memory_usage] after initializing group 0 + 0: [2023-05-25 13:37:57,006] [INFO] [utils.py:828:see_memory_usage] MA 2.43 GB Max_MA 2.43 GB CA 3.14 GB Max_CA 3 GB + 0: [2023-05-25 13:37:57,006] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 40.58 GB, percent = 8.1% + 0: [2023-05-25 13:37:57,112] [INFO] [utils.py:827:see_memory_usage] before initializing group 1 + 0: [2023-05-25 13:37:57,112] [INFO] [utils.py:828:see_memory_usage] MA 2.43 GB Max_MA 2.43 GB CA 3.14 GB Max_CA 3 GB + 0: [2023-05-25 13:37:57,113] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 40.57 GB, percent = 8.1% + 0: [2023-05-25 13:37:57,219] [INFO] [utils.py:827:see_memory_usage] after initializing group 1 + 0: [2023-05-25 13:37:57,220] [INFO] [utils.py:828:see_memory_usage] MA 3.58 GB Max_MA 3.58 GB CA 4.76 GB Max_CA 5 GB + 0: [2023-05-25 13:37:57,220] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 40.62 GB, percent = 8.1% + 0: [2023-05-25 13:37:57,324] [INFO] [utils.py:827:see_memory_usage] before initializing group 2 + 0: [2023-05-25 13:37:57,325] [INFO] [utils.py:828:see_memory_usage] MA 3.58 GB Max_MA 3.58 GB CA 4.76 GB Max_CA 5 GB + 0: [2023-05-25 13:37:57,325] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 40.61 GB, percent = 8.1% + 0: [2023-05-25 13:37:57,431] [INFO] [utils.py:827:see_memory_usage] after initializing group 2 + 0: [2023-05-25 13:37:57,431] [INFO] [utils.py:828:see_memory_usage] MA 3.58 GB Max_MA 3.58 GB CA 4.76 GB Max_CA 5 GB + 0: [2023-05-25 13:37:57,432] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 40.59 GB, percent = 8.1% +24: Time to load utils op: 1.3585577011108398 seconds +24: Time to load utils op: 0.9049663543701172 seconds +24: Time to load utils op: 1.3587603569030762 seconds + 9: Time to load utils op: 1.5204792022705078 secondsTime to load utils op: 1.5204503536224365 seconds + 9: +30: Time to load utils op: 0.9090657234191895 seconds +13: Time to load utils op: 1.5154027938842773 seconds +12: Time to load utils op: 1.519517421722412 seconds +27: Time to load utils op: 0.9128096103668213 seconds +10: Time to load utils op: 1.4154348373413086 seconds +31: Time to load utils op: 0.9112515449523926 seconds + 8: Time to load utils op: 1.4076459407806396 seconds +14: Time to load utils op: 1.5218493938446045 seconds +11: Time to load utils op: 1.5232417583465576 seconds +15: Time to load utils op: 1.5217669010162354 seconds +26: Time to load utils op: 0.9139127731323242 seconds +25: Time to load utils op: 0.9124491214752197 seconds +30: Time to load utils op: 0.9135839939117432 seconds +28: Time to load utils op: 0.9142982959747314 seconds +20: Time to load utils op: 1.4124619960784912 seconds +29: Time to load utils op: 0.9148545265197754 seconds +20: Time to load utils op: 1.4125392436981201 seconds +18: Time to load utils op: 1.416581630706787 seconds + 9: Time to load utils op: 1.4094302654266357 seconds + 0: Time to load utils op: 1.2117903232574463 seconds +19: Time to load utils op: 1.412179946899414 seconds +13: Time to load utils op: 1.522350549697876 seconds +12: Time to load utils op: 1.409986972808838 seconds +15: Time to load utils op: 1.411417007446289 seconds +27: Time to load utils op: 0.9187188148498535 seconds +23: Time to load utils op: 1.4150562286376953 seconds + 9: Time to load utils op: 1.4122142791748047 seconds +31: Time to load utils op: 0.9175219535827637 seconds +11: Time to load utils op: 1.4125797748565674 seconds +14: Time to load utils op: 1.4122824668884277 seconds +26: Time to load utils op: 0.9198484420776367 seconds +25: Time to load utils op: 0.9182353019714355 seconds +28: Time to load utils op: 0.9202065467834473 seconds + 0: Time to load utils op: 1.012251615524292 seconds +29: Time to load utils op: 0.9204561710357666 seconds +18: Time to load utils op: 1.42279052734375 seconds +22: Time to load utils op: 1.417161226272583 seconds + 8: Time to load utils op: 1.4147624969482422 seconds +15: Time to load utils op: 1.4162187576293945 seconds +13: Time to load utils op: 1.416511058807373 seconds +19: Time to load utils op: 1.4203555583953857 seconds +10: Time to load utils op: 1.426513433456421 seconds +12: Time to load utils op: 1.532006025314331 seconds +23: Time to load utils op: 1.4213123321533203 seconds +24: Time to load utils op: 0.0005617141723632812 seconds +24: Time to load utils op: 0.00038051605224609375 seconds +24: Time to load utils op: 0.00035643577575683594 seconds +11: Time to load utils op: 1.535393476486206 seconds +14: Time to load utils op: 1.5340511798858643 seconds + 0: Time to load utils op: 1.0168066024780273 seconds +22: Time to load utils op: 1.4235260486602783 seconds +15: Time to load utils op: 1.53786039352417 seconds +13: Time to load utils op: 1.422839879989624 seconds +11: Time to load utils op: 1.4239988327026367 seconds +14: Time to load utils op: 1.4244880676269531 seconds +10: Time to load utils op: 1.5378572940826416 seconds +10: Time to load utils op: 1.5438995361328125 seconds + 8: Time to load utils op: 0.0005128383636474609 seconds + 8: Time to load utils op: 0.0003561973571777344 seconds + 9: Time to load utils op: 0.005045413970947266 seconds + 9: Time to load utils op: 0.0003998279571533203 secondsTime to load utils op: 0.00043892860412597656 seconds + 9: + 9: Time to load utils op: 0.0003948211669921875 seconds + 0: [2023-05-25 13:37:57,546] [INFO] [utils.py:827:see_memory_usage] before initialize_optimizer + 0: [2023-05-25 13:37:57,547] [INFO] [utils.py:828:see_memory_usage] MA 3.58 GB Max_MA 3.58 GB CA 4.76 GB Max_CA 5 GB + 0: [2023-05-25 13:37:57,547] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 40.64 GB, percent = 8.1% +30: Time to load utils op: 0.0031511783599853516 seconds +30: Time to load utils op: 0.0024242401123046875 seconds +10: Time to load utils op: 0.006439208984375 seconds +10: Time to load utils op: 0.0065593719482421875 seconds +10: Time to load utils op: 0.006856441497802734 secondsTime to load utils op: 0.0058116912841796875 seconds +10: +15: Time to load utils op: 0.005354642868041992 seconds +15: Time to load utils op: 0.005326509475708008 seconds +15: Time to load utils op: 0.005168437957763672 seconds +15: Time to load utils op: 0.004804134368896484 seconds +20: Time to load utils op: 0.004908323287963867 seconds +20: Time to load utils op: 0.0048220157623291016 seconds +13: Time to load utils op: 0.005181074142456055 seconds +13: Time to load utils op: 0.004738569259643555 seconds +13: Time to load utils op: 0.005239725112915039 seconds +13: Time to load utils op: 0.005003452301025391 seconds + 0: Time to load utils op: 0.005186796188354492 secondsTime to load utils op: 0.0054874420166015625 seconds + 0: + 0: Time to load utils op: 0.005579471588134766 seconds + 0: [2023-05-25 13:37:57,812] [INFO] [utils.py:827:see_memory_usage] end initialize_optimizer + 0: [2023-05-25 13:37:57,813] [INFO] [utils.py:828:see_memory_usage] MA 3.87 GB Max_MA 3.87 GB CA 5.04 GB Max_CA 5 GB + 0: [2023-05-25 13:37:57,813] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 40.59 GB, percent = 8.1% +23: Time to load utils op: 0.003995418548583984 seconds +12: Time to load utils op: 0.00435638427734375 seconds +12: Time to load utils op: 0.00035381317138671875 seconds +27: Time to load utils op: 0.004480123519897461 seconds +23: Time to load utils op: 0.00046753883361816406 seconds +27: Time to load utils op: 0.00036025047302246094 seconds +31: Time to load utils op: 0.004678249359130859 seconds +31: Time to load utils op: 0.00401616096496582 seconds +28: Time to load utils op: 0.004118442535400391 seconds +28: Time to load utils op: 0.00035262107849121094 seconds +19: Time to load utils op: 0.003877878189086914 seconds +11: Time to load utils op: 0.005124330520629883 seconds +19: Time to load utils op: 0.00040912628173828125 seconds +25: Time to load utils op: 0.0038955211639404297 secondsTime to load utils op: 0.003854036331176758 seconds +25: +11: Time to load utils op: 0.0004703998565673828 seconds +12: Time to load utils op: 0.0004875659942626953 seconds +18: Time to load utils op: 0.0043582916259765625 seconds +18: Time to load utils op: 0.0036308765411376953 seconds +29: Time to load utils op: 0.004399299621582031 seconds +14: Time to load utils op: 0.003698587417602539 seconds +29: Time to load utils op: 0.00035262107849121094 seconds +26: Time to load utils op: 0.0039052963256835938 seconds +26: Time to load utils op: 0.003925800323486328 seconds +14: Time to load utils op: 0.0004837512969970703 seconds +11: Time to load utils op: 0.0004680156707763672 seconds +22: Time to load utils op: 0.0045375823974609375 seconds +14: Time to load utils op: 0.00046563148498535156 seconds +11: Time to load utils op: 0.0003523826599121094 seconds +22: Time to load utils op: 0.00047659873962402344 seconds +14: Time to load utils op: 0.0004787445068359375 seconds + 0: [2023-05-25 13:37:57,926] [INFO] [utils.py:827:see_memory_usage] end bf16_optimizer + 0: [2023-05-25 13:37:57,927] [INFO] [utils.py:828:see_memory_usage] MA 3.87 GB Max_MA 3.87 GB CA 5.04 GB Max_CA 5 GB + 0: [2023-05-25 13:37:57,927] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 40.63 GB, percent = 8.1% + 0: [2023-05-25 13:37:57,927] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam + 0: [2023-05-25 13:37:57,927] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using client LR scheduler + 0: [2023-05-25 13:37:57,927] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = + 0: [2023-05-25 13:37:57,927] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0002, 0.0002, 0.0002], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] + 0: [2023-05-25 13:37:57,928] [INFO] [config.py:1007:print] DeepSpeedEngine configuration: + 0: [2023-05-25 13:37:57,928] [INFO] [config.py:1011:print] activation_checkpointing_config { + 0: "partition_activations": false, + 0: "contiguous_memory_optimization": false, + 0: "cpu_checkpointing": false, + 0: "number_checkpoints": null, + 0: "synchronize_checkpoint_boundary": false, + 0: "profile": false + 0: } + 0: [2023-05-25 13:37:57,928] [INFO] [config.py:1011:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} + 0: [2023-05-25 13:37:57,928] [INFO] [config.py:1011:print] amp_enabled .................. False + 0: [2023-05-25 13:37:57,928] [INFO] [config.py:1011:print] amp_params ................... False + 0: [2023-05-25 13:37:57,928] [INFO] [config.py:1011:print] autotuning_config ............ { + 0: "enabled": false, + 0: "start_step": null, + 0: "end_step": null, + 0: "metric_path": null, + 0: "arg_mappings": null, + 0: "metric": "throughput", + 0: "model_info": null, + 0: "results_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_results", + 0: "exps_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_exps", + 0: "overwrite": true, + 0: "fast": true, + 0: "start_profile_step": 3, + 0: "end_profile_step": 5, + 0: "tuner_type": "gridsearch", + 0: "tuner_early_stopping": 5, + 0: "tuner_num_trials": 50, + 0: "model_info_path": null, + 0: "mp_size": 1, + 0: "max_train_batch_size": null, + 0: "min_train_batch_size": 1, + 0: "max_train_micro_batch_size_per_gpu": 1.024000e+03, + 0: "min_train_micro_batch_size_per_gpu": 1, + 0: "num_tuning_micro_batch_sizes": 3 + 0: } + 0: [2023-05-25 13:37:57,928] [INFO] [config.py:1011:print] bfloat16_enabled ............. True + 0: [2023-05-25 13:37:57,928] [INFO] [config.py:1011:print] checkpoint_parallel_write_pipeline False + 0: [2023-05-25 13:37:57,928] [INFO] [config.py:1011:print] checkpoint_tag_validation_enabled True + 0: [2023-05-25 13:37:57,928] [INFO] [config.py:1011:print] checkpoint_tag_validation_fail False + 0: [2023-05-25 13:37:57,928] [INFO] [config.py:1011:print] comms_config ................. + 0: [2023-05-25 13:37:57,928] [INFO] [config.py:1011:print] communication_data_type ...... None + 0: [2023-05-25 13:37:57,928] [INFO] [config.py:1011:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_pa + 0: rameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} + 0: [2023-05-25 13:37:57,928] [INFO] [config.py:1011:print] curriculum_enabled ........... False + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] curriculum_params ............ False + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] dataloader_drop_last ......... False + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] disable_allgather ............ False + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] dump_state ................... False + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] dynamic_loss_scale_args ...... None + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] eigenvalue_enabled ........... False + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] eigenvalue_gas_boundary_resolution 1 + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] eigenvalue_layer_name ........ bert.encoder.layer + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] eigenvalue_layer_num ......... 0 + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] eigenvalue_max_iter .......... 100 + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] eigenvalue_stability ......... 1e-06 + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] eigenvalue_tol ............... 0.01 + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] eigenvalue_verbose ........... False + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] elasticity_enabled ........... False + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] flops_profiler_config ........ { + 0: "enabled": false, + 0: "profile_step": 1, + 0: "module_depth": -1, + 0: "top_modules": 1, + 0: "detailed": true, + 0: "output_file": null + 0: } + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] fp16_auto_cast ............... None + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] fp16_enabled ................. False + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] fp16_master_weights_and_gradients False + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] global_rank .................. 0 + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] gradient_accumulation_steps .. 32 + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] gradient_clipping ............ 1.0 + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] gradient_predivide_factor .... 1.0 + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] initial_dynamic_scale ........ 1 + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] load_universal_checkpoint .... False + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] loss_scale ................... 1.0 + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] memory_breakdown ............. False + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] monitor_config ............... + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] nebula_config ................ { + 0: "enabled": false, + 0: "persistent_storage_path": null, + 0: "persistent_time_interval": 100, + 0: "num_of_version_in_retention": 2, + 0: "enable_nebula_load": true, + 0: "load_path": null + 0: } + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] optimizer_legacy_fusion ...... False + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] optimizer_name ............... None + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] optimizer_params ............. None + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] pld_enabled .................. False + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] pld_params ................... False + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] prescale_gradients ........... False + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] scheduler_name ............... None + 0: [2023-05-25 13:37:57,929] [INFO] [config.py:1011:print] scheduler_params ............. None + 0: [2023-05-25 13:37:57,930] [INFO] [config.py:1011:print] sparse_attention ............. None + 0: [2023-05-25 13:37:57,930] [INFO] [config.py:1011:print] sparse_gradients_enabled ..... False + 0: [2023-05-25 13:37:57,930] [INFO] [config.py:1011:print] steps_per_print .............. 2000 + 0: [2023-05-25 13:37:57,930] [INFO] [config.py:1011:print] train_batch_size ............. 512 + 0: [2023-05-25 13:37:57,930] [INFO] [config.py:1011:print] train_micro_batch_size_per_gpu 1 + 0: [2023-05-25 13:37:57,930] [INFO] [config.py:1011:print] use_node_local_storage ....... False + 0: [2023-05-25 13:37:57,930] [INFO] [config.py:1011:print] wall_clock_breakdown ......... False + 0: [2023-05-25 13:37:57,930] [INFO] [config.py:1011:print] world_size ................... 16 + 0: [2023-05-25 13:37:57,930] [INFO] [config.py:1011:print] zero_allow_untested_optimizer False + 0: [2023-05-25 13:37:57,930] [INFO] [config.py:1011:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500000000 allgather_partitions=True allgather_bucket_size=500000000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=None sub_group_size=1000000000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50000000 param_persistence_threshold=100000 model_persistence_threshold=9223372036854775807 max_live_parameters=1000000000 max_reuse_distance=1000000000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False + 0: [2023-05-25 13:37:57,930] [INFO] [config.py:1011:print] zero_enabled ................. False + 0: [2023-05-25 13:37:57,930] [INFO] [config.py:1011:print] zero_optimization_stage ...... 0 + 0: [2023-05-25 13:37:57,930] [INFO] [config.py:996:print_user_config] json = { + 0: "train_micro_batch_size_per_gpu": 1, + 0: "train_batch_size": 512, + 0: "gradient_clipping": 1.0, + 0: "zero_optimization": { + 0: "stage": 0 + 0: }, + 0: "bf16": { + 0: "enabled": true + 0: }, + 0: "steps_per_print": 2.000000e+03, + 0: "wall_clock_breakdown": false + 0: } + 0: Time to load utils op: 0.0004336833953857422 seconds + 0: [2023-05-25 13:37:57,930] [INFO] [engine.py:87:__init__] CONFIG: micro_batches=32 micro_batch_size=1 + 0: [2023-05-25 13:37:58,360] [INFO] [engine.py:145:__init__] RANK=0 STAGE=0 LAYERS=14 [0, 14) STAGE_PARAMS=614290432 (614.290M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 0: [2023-05-25 13:37:58,360] [INFO] [engine.py:145:__init__] RANK=1 STAGE=0 LAYERS=14 [0, 14) STAGE_PARAMS=614290432 (614.290M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 0: [2023-05-25 13:37:58,360] [INFO] [engine.py:145:__init__] RANK=2 STAGE=0 LAYERS=14 [0, 14) STAGE_PARAMS=614290432 (614.290M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 0: [2023-05-25 13:37:58,360] [INFO] [engine.py:145:__init__] RANK=3 STAGE=0 LAYERS=14 [0, 14) STAGE_PARAMS=614290432 (614.290M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +16: [2023-05-25 13:37:58,361] [INFO] [engine.py:145:__init__] RANK=131 STAGE=2 LAYERS=11 [25, 36) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +16: [2023-05-25 13:37:58,361] [INFO] [engine.py:145:__init__] RANK=129 STAGE=2 LAYERS=11 [25, 36) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +24: [2023-05-25 13:37:58,361] [INFO] [engine.py:145:__init__] RANK=195 STAGE=3 LAYERS=13 [36, 49) STAGE_PARAMS=513571840 (513.572M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +24: [2023-05-25 13:37:58,361] [INFO] [engine.py:145:__init__] RANK=192 STAGE=3 LAYERS=13 [36, 49) STAGE_PARAMS=513571840 (513.572M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +24: [2023-05-25 13:37:58,361] [INFO] [engine.py:145:__init__] RANK=193 STAGE=3 LAYERS=13 [36, 49) STAGE_PARAMS=513571840 (513.572M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +24: [2023-05-25 13:37:58,361] [INFO] [engine.py:145:__init__] RANK=194 STAGE=3 LAYERS=13 [36, 49) STAGE_PARAMS=513571840 (513.572M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +16: [2023-05-25 13:37:58,361] [INFO] [engine.py:145:__init__] RANK=130 STAGE=2 LAYERS=11 [25, 36) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +16: [2023-05-25 13:37:58,361] [INFO] [engine.py:145:__init__] RANK=128 STAGE=2 LAYERS=11 [25, 36) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 8: [2023-05-25 13:37:58,361] [INFO] [engine.py:145:__init__] RANK=65 STAGE=1 LAYERS=11 [14, 25) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 8: [2023-05-25 13:37:58,361] [INFO] [engine.py:145:__init__] RANK=64 STAGE=1 LAYERS=11 [14, 25) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 8: [2023-05-25 13:37:58,361] [INFO] [engine.py:145:__init__] RANK=66 STAGE=1 LAYERS=11 [14, 25) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 8: [2023-05-25 13:37:58,361] [INFO] [engine.py:145:__init__] RANK=67 STAGE=1 LAYERS=11 [14, 25) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 0: [2023-05-25 13:37:59,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:37:59,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:37:59,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:37:59,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:37:59,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:37:59,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:37:59,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:37:59,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:37:59,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:37:59,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:37:59,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:37:59,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:37:59,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:37:59,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:37:59,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:37:59,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:37:59,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:37:59,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:37:59,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:37:59,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:37:59,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:37:59,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:37:59,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:37:59,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:37:59,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt... + 0: [2023-05-25 13:37:59,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt... + 0: [2023-05-25 13:37:59,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:37:59,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt... + 0: [2023-05-25 13:37:59,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt... + 0: [2023-05-25 13:37:59,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt... + 0: [2023-05-25 13:37:59,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt... + 0: [2023-05-25 13:37:59,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:37:59,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:37:59,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:37:59,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +24: [2023-05-25 13:37:59,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:37:59,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:37:59,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:37:59,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:37:59,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:37:59,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:37:59,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:37:59,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:37:59,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt... +24: [2023-05-25 13:37:59,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt... +24: [2023-05-25 13:37:59,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt... +24: [2023-05-25 13:37:59,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt... +24: [2023-05-25 13:37:59,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt... +24: [2023-05-25 13:37:59,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt... +24: [2023-05-25 13:37:59,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt... +24: [2023-05-25 13:37:59,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt... +18: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt... +18: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt... +18: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt... +18: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt... +18: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt... + 5: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt... +18: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt... +18: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt... + 5: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:37:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt... + 5: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt... +31: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +31: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +31: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +31: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +31: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt... + 5: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt... + 6: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt... +14: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +31: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +31: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt... + 6: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt... + 2: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt... +31: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt... +31: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt... +31: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt... +31: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt... + 7: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt... +31: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt... + 5: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt... + 8: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt... + 7: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt... + 4: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt... +31: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt... +31: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt... + 3: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt... +11: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt... +14: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt... + 3: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt... +11: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt... +11: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt... + 8: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt... + 8: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt... + 8: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt... + 8: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt... + 7: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt... +17: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt... + 8: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt... + 8: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt... + 8: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt... + 7: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt... +14: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt... +14: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt... + 3: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt... + 6: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt... +10: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt... + 3: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt... + 9: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 9: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt... + 6: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt... + 4: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt... + 6: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt... + 6: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt... + 2: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt... + 9: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 9: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 9: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 9: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 9: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt... + 2: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt... +23: [2023-05-25 13:37:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 9: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +15: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +15: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt... + 3: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt... + 1: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +15: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +15: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +15: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt... + 3: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt... + 2: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt... + 2: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt... +15: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt... + 1: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt... +26: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt... + 2: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt... + 4: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt... + 4: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt... + 2: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt... +15: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt... + 4: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt... +17: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt... +17: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt... +12: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt... + 4: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt... +17: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt... +12: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt... +17: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt... +17: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt... + 5: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +10: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt... +17: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt... +23: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt... + 9: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt... +10: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt... + 1: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt... + 5: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +10: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt... +10: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt... +10: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt... +23: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt... +23: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt... +27: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 9: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt... +16: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt... +10: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt... +10: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt... + 9: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt... + 1: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt... + 7: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt... +23: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt... +27: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 9: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt... + 1: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt... +15: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt... +15: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt... +19: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt... + 9: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt... + 9: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt... + 6: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt... + 9: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt... + 9: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt... +16: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt... +13: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt... +27: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt... +15: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt... +15: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt... +14: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +15: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt... +28: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt... + 1: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt... +15: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt... +15: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt... +13: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt... +12: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt... +12: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt... +26: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt... +26: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt... +21: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +22: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +22: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +15: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt... +12: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt... +12: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt... +26: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt... +26: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt... +30: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt... +26: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt... +26: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt... +26: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt... +26: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt... +22: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +22: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +22: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +22: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt... +12: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt... +28: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +22: [2023-05-25 13:37:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt... +29: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt... + 4: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt... +27: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt... +19: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt... +13: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt... +27: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt... +27: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt... +27: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt... + 7: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +22: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt... +13: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt... +27: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt... +25: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt... +16: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt... +16: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt... +16: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt... + 2: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt... +19: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt... +19: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt... +13: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt... +25: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt... +16: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt... + 7: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +19: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt... +19: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt... +19: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt... +13: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt... +13: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt... +25: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt... +25: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt... +25: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt... +21: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt... +16: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt... +19: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt... +13: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt... +16: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt... +25: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt... +25: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt... +25: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt... +21: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt... +21: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt... +21: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt... +21: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt... + 1: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt... +20: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt... +16: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt... +28: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt... +28: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt... +21: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt... +21: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt... +16: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt... +20: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt... +21: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt... +20: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt... +20: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt... +20: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt... +20: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt... +28: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt... +28: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt... +29: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt... +28: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt... +28: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt... +28: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt... +28: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt... +22: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt... +29: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt... +30: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt... +30: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt... +29: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt... +20: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt... +20: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt... +29: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt... +22: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt... +30: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt... +29: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt... +22: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt... +30: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt... +29: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt... +22: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt... +30: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt... +30: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt... +30: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt... +30: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt... +29: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt... +29: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt... + 3: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +22: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt... +22: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt... +22: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt... +22: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt... + 3: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +12: [2023-05-25 13:37:59,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:37:59,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +12: [2023-05-25 13:37:59,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt... + 6: [2023-05-25 13:37:59,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:37:59,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 2: [2023-05-25 13:37:59,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 6: [2023-05-25 13:37:59,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:37:59,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 2: [2023-05-25 13:37:59,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:37:59,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +23: [2023-05-25 13:37:59,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt. +18: [2023-05-25 13:37:59,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt. +17: [2023-05-25 13:37:59,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt. +18: [2023-05-25 13:37:59,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt. +20: [2023-05-25 13:37:59,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt. +19: [2023-05-25 13:37:59,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt. +23: [2023-05-25 13:37:59,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt. +17: [2023-05-25 13:37:59,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt. +16: [2023-05-25 13:37:59,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt. +20: [2023-05-25 13:37:59,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt. +19: [2023-05-25 13:37:59,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt. +22: [2023-05-25 13:37:59,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt. +21: [2023-05-25 13:37:59,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt. +16: [2023-05-25 13:37:59,453] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt. +21: [2023-05-25 13:37:59,453] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt. +23: [2023-05-25 13:37:59,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:37:59,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +17: [2023-05-25 13:37:59,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +23: [2023-05-25 13:37:59,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +17: [2023-05-25 13:37:59,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +20: [2023-05-25 13:37:59,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:37:59,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +19: [2023-05-25 13:37:59,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:37:59,453] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_09_model_states.pt. +16: [2023-05-25 13:37:59,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +19: [2023-05-25 13:37:59,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +16: [2023-05-25 13:37:59,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +20: [2023-05-25 13:37:59,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:37:59,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:37:59,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:37:59,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:37:59,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +11: [2023-05-25 13:37:59,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt. +11: [2023-05-25 13:37:59,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt. + 9: [2023-05-25 13:37:59,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt. +10: [2023-05-25 13:37:59,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt. +15: [2023-05-25 13:37:59,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt. +10: [2023-05-25 13:37:59,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt. +13: [2023-05-25 13:37:59,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt. +15: [2023-05-25 13:37:59,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt. + 8: [2023-05-25 13:37:59,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt. +14: [2023-05-25 13:37:59,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt. + 8: [2023-05-25 13:37:59,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt. +13: [2023-05-25 13:37:59,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt. +14: [2023-05-25 13:37:59,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt. +11: [2023-05-25 13:37:59,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +11: [2023-05-25 13:37:59,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:37:59,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt. + 9: [2023-05-25 13:37:59,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:37:59,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:37:59,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:37:59,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +12: [2023-05-25 13:37:59,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt. +13: [2023-05-25 13:37:59,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +13: [2023-05-25 13:37:59,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 8: [2023-05-25 13:37:59,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 8: [2023-05-25 13:37:59,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:37:59,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +14: [2023-05-25 13:37:59,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +14: [2023-05-25 13:37:59,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:37:59,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +12: [2023-05-25 13:37:59,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_06_model_states.pt. +12: [2023-05-25 13:37:59,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +24: [2023-05-25 13:37:59,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt. +12: [2023-05-25 13:37:59,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +25: [2023-05-25 13:37:59,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt. +31: [2023-05-25 13:37:59,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt. +24: [2023-05-25 13:37:59,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt. +27: [2023-05-25 13:37:59,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt. +26: [2023-05-25 13:37:59,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt. +28: [2023-05-25 13:37:59,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt. +31: [2023-05-25 13:37:59,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt. +25: [2023-05-25 13:37:59,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt. +29: [2023-05-25 13:37:59,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt. +26: [2023-05-25 13:37:59,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt. +28: [2023-05-25 13:37:59,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt. +29: [2023-05-25 13:37:59,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt. +24: [2023-05-25 13:37:59,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +24: [2023-05-25 13:37:59,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +27: [2023-05-25 13:37:59,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt. +25: [2023-05-25 13:37:59,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:37:59,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +30: [2023-05-25 13:37:59,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt. +27: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +26: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +28: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +28: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +26: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +11: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt. +30: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_13_model_states.pt. +29: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +29: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +11: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt. +14: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt. +27: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +13: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt. +15: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt. + 9: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt. +14: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt. +10: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt. +15: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt. + 8: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt. +13: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt. +12: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt. + 9: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt. +11: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:37:59,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt. +11: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +18: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt. +14: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +13: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +30: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +14: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +18: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt. +13: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +30: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... + 8: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +23: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt. +12: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +17: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt. +23: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt. +21: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt. +20: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt. +18: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt. +19: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt. +17: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt. +12: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt. +24: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt. +16: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt. +20: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt. +18: [2023-05-25 13:37:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +24: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt. +16: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt. +22: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt. +21: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt. +23: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +19: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_11_model_states.pt. +23: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +17: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +20: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +25: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt. +28: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt. +22: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +19: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +17: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +29: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt. +31: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt. +26: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt. +27: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt. +28: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt. +16: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +24: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt. +29: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt. +31: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt. +20: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +27: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt. +24: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +16: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... + 8: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_05_model_states.pt. +22: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +26: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt. +21: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +11: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt. +19: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +28: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:37:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +26: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +28: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +12: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +27: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +29: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +11: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt. +25: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +15: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt. +29: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... + 8: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +27: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +14: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt. +26: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +30: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt. +10: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt. +11: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +13: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt. + 9: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt. +10: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt. +30: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_12_model_states.pt. +13: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt. +14: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt. +11: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt. +12: [2023-05-25 13:37:59,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt. +14: [2023-05-25 13:37:59,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:37:59,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +13: [2023-05-25 13:37:59,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:37:59,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:37:59,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +13: [2023-05-25 13:37:59,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +14: [2023-05-25 13:37:59,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +30: [2023-05-25 13:37:59,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... + 9: [2023-05-25 13:37:59,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 0: [2023-05-25 13:37:59,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt. +30: [2023-05-25 13:37:59,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +15: [2023-05-25 13:37:59,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt. + 8: [2023-05-25 13:37:59,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt. +12: [2023-05-25 13:37:59,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +18: [2023-05-25 13:37:59,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt. + 5: [2023-05-25 13:37:59,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt. + 0: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt. +16: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt. + 8: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt. +17: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt. +23: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt. + 0: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 2: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt. +20: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt. +22: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt. + 6: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt. + 7: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt. + 3: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt. + 1: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt. +17: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt. +16: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt. + 4: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt. +21: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt. + 7: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt. +23: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt. + 2: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt. +15: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +19: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt. +20: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt. + 8: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +18: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt. + 3: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt. + 6: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt. +21: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt. + 5: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +19: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt. + 4: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt. +16: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... + 0: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt. + 8: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +17: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +23: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +20: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... + 2: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +22: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... + 6: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 3: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +16: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... + 4: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:37:59,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 5: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_01_model_states.pt. +17: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_10_model_states.pt. +23: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... + 7: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +21: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... + 2: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +20: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +19: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... + 3: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 6: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +19: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... + 4: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +21: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... + 1: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +12: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_07_model_states.pt. + 5: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +18: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... + 0: [2023-05-25 13:37:59,465] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt. + 0: [2023-05-25 13:37:59,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt. + 5: [2023-05-25 13:37:59,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt. + 5: [2023-05-25 13:37:59,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt. + 0: [2023-05-25 13:37:59,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:37:59,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 6: [2023-05-25 13:37:59,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt. +12: [2023-05-25 13:37:59,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 5: [2023-05-25 13:37:59,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 5: [2023-05-25 13:37:59,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:37:59,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt. + 7: [2023-05-25 13:37:59,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt. + 4: [2023-05-25 13:37:59,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt. + 6: [2023-05-25 13:37:59,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:37:59,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt. + 6: [2023-05-25 13:37:59,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt. + 4: [2023-05-25 13:37:59,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:37:59,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt. + 2: [2023-05-25 13:37:59,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt. + 7: [2023-05-25 13:37:59,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 2: [2023-05-25 13:37:59,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt. + 4: [2023-05-25 13:37:59,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +24: [2023-05-25 13:37:59,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt. +25: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt. +24: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt. +25: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt. + 6: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +28: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt. +31: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt. + 2: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +27: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt. + 2: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +18: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt. +26: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt. +31: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt. +28: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt. +24: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +29: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt. +26: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt. +27: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt. +23: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt. + 1: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt. +24: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +29: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt. +25: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +23: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt. +30: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt. + 0: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt. +28: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... + 3: [2023-05-25 13:37:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt. +27: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... + 0: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt. +18: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +26: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +28: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... + 3: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_03_model_states.pt. +26: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... + 5: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt. +27: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +29: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +23: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +30: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_14_model_states.pt. + 5: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt. +17: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt. +29: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... + 1: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +23: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... + 0: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +17: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt. + 3: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +19: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt. +20: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt. + 3: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:37:59,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt. +16: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt. + 5: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +30: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +19: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt. + 5: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt. +17: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt. + 6: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt. +20: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt. +16: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt. +17: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... + 6: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt. +20: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt. +19: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt. + 7: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +22: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt. +19: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +16: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +30: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... + 7: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +21: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_08_model_states.pt. +18: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +20: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... + 6: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +16: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... + 4: [2023-05-25 13:37:59,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt. +11: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt. + 6: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +22: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... + 2: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt. + 1: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt. +22: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... + 3: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt. + 4: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt. +11: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt. +10: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt. + 9: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt. + 3: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt. +15: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt. +21: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... + 1: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt. + 9: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt. +10: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt. +15: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt. +14: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt. + 2: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_02_model_states.pt. + 4: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +11: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +13: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt. + 8: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt. +14: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt. +12: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt. + 2: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +13: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt. + 8: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt. +11: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 1: [2023-05-25 13:37:59,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 3: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +10: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 4: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 9: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 3: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +15: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 1: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +15: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +12: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_04_model_states.pt. +14: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 2: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +13: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 8: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +24: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt. +14: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +13: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +12: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 8: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +24: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt. +27: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt. +31: [2023-05-25 13:37:59,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt. +27: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt. +31: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt. +26: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt. +12: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +24: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +28: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt. +25: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt. +29: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt. +24: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +28: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt. +25: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt. +29: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt. +26: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt. +27: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +30: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt. +27: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +26: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +30: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/mp_rank_15_model_states.pt. +29: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +28: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:37:59,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +28: [2023-05-25 13:37:59,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +29: [2023-05-25 13:37:59,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:37:59,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +26: [2023-05-25 13:37:59,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +30: [2023-05-25 13:37:59,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +30: [2023-05-25 13:37:59,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... + 4: [2023-05-25 13:37:59,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:37:59,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:37:59,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:37:59,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:37:59,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:37:59,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:37:59,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:37:59,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:37:59,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt... + 4: [2023-05-25 13:37:59,660] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt... + 4: [2023-05-25 13:37:59,660] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt... + 4: [2023-05-25 13:37:59,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt... + 4: [2023-05-25 13:37:59,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt... + 4: [2023-05-25 13:37:59,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt... + 4: [2023-05-25 13:37:59,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:37:59,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... +18: [2023-05-25 13:37:59,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:37:59,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:37:59,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:37:59,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:37:59,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:37:59,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:37:59,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:37:59,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. + 1: [2023-05-25 13:37:59,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:37:59,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:37:59,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:37:59,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:37:59,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:37:59,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:37:59,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:37:59,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:37:59,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt... +18: [2023-05-25 13:37:59,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt... +18: [2023-05-25 13:37:59,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:37:59,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt... +18: [2023-05-25 13:37:59,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:37:59,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt... + 1: [2023-05-25 13:37:59,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt... +18: [2023-05-25 13:37:59,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt... +18: [2023-05-25 13:37:59,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt... +18: [2023-05-25 13:37:59,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt... + 1: [2023-05-25 13:37:59,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt... + 1: [2023-05-25 13:37:59,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt... + 1: [2023-05-25 13:37:59,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:37:59,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt... + 1: [2023-05-25 13:37:59,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 5: [2023-05-25 13:37:59,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:37:59,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:37:59,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:37:59,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:37:59,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:37:59,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:37:59,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:37:59,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:37:59,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt... + 7: [2023-05-25 13:37:59,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:37:59,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:37:59,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:37:59,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:37:59,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:37:59,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:37:59,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:37:59,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:37:59,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:37:59,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:37:59,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt... + 5: [2023-05-25 13:37:59,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt... + 5: [2023-05-25 13:37:59,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt... + 5: [2023-05-25 13:37:59,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt... + 5: [2023-05-25 13:37:59,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt... + 5: [2023-05-25 13:37:59,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt... + 3: [2023-05-25 13:37:59,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:37:59,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:37:59,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:37:59,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 0: [2023-05-25 13:37:59,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 0: [2023-05-25 13:37:59,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:37:59,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:37:59,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:37:59,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:37:59,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:37:59,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:37:59,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:37:59,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:37:59,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:37:59,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:37:59,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:37:59,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:37:59,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:37:59,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:37:59,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt... + 0: [2023-05-25 13:37:59,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 0: [2023-05-25 13:37:59,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 0: [2023-05-25 13:37:59,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 0: [2023-05-25 13:37:59,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 0: [2023-05-25 13:37:59,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:37:59,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt... + 0: [2023-05-25 13:37:59,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:37:59,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:37:59,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt... + 7: [2023-05-25 13:37:59,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt... + 7: [2023-05-25 13:37:59,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt... + 7: [2023-05-25 13:37:59,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt... + 0: [2023-05-25 13:37:59,698] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 2: [2023-05-25 13:37:59,699] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 2: [2023-05-25 13:37:59,699] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 2: [2023-05-25 13:37:59,699] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt... +17: [2023-05-25 13:37:59,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:37:59,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:37:59,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:37:59,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:37:59,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:37:59,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:37:59,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. + 2: [2023-05-25 13:37:59,700] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt... + 4: [2023-05-25 13:37:59,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. +17: [2023-05-25 13:37:59,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. + 3: [2023-05-25 13:37:59,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 3: [2023-05-25 13:37:59,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt... + 0: [2023-05-25 13:37:59,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt... + 2: [2023-05-25 13:37:59,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt... + 2: [2023-05-25 13:37:59,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt... + 2: [2023-05-25 13:37:59,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt... + 2: [2023-05-25 13:37:59,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt... +17: [2023-05-25 13:37:59,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt... +17: [2023-05-25 13:37:59,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt... +17: [2023-05-25 13:37:59,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt... +17: [2023-05-25 13:37:59,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +10: [2023-05-25 13:37:59,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:37:59,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:37:59,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:37:59,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:37:59,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 4: [2023-05-25 13:37:59,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. +10: [2023-05-25 13:37:59,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:37:59,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 3: [2023-05-25 13:37:59,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:37:59,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:37:59,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt... + 0: [2023-05-25 13:37:59,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt... +17: [2023-05-25 13:37:59,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt... + 0: [2023-05-25 13:37:59,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt... +10: [2023-05-25 13:37:59,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 3: [2023-05-25 13:37:59,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt... + 3: [2023-05-25 13:37:59,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt... + 0: [2023-05-25 13:37:59,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt... + 0: [2023-05-25 13:37:59,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt... +17: [2023-05-25 13:37:59,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt... +17: [2023-05-25 13:37:59,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... + 3: [2023-05-25 13:37:59,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt... + 3: [2023-05-25 13:37:59,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt... + 3: [2023-05-25 13:37:59,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt... +17: [2023-05-25 13:37:59,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt... +10: [2023-05-25 13:37:59,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:37:59,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt... +10: [2023-05-25 13:37:59,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt... +10: [2023-05-25 13:37:59,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:37:59,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt... +10: [2023-05-25 13:37:59,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt... +10: [2023-05-25 13:37:59,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt... +14: [2023-05-25 13:37:59,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:37:59,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:37:59,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt... +14: [2023-05-25 13:37:59,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:37:59,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:37:59,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:37:59,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:37:59,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:37:59,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +24: [2023-05-25 13:37:59,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:37:59,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:37:59,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:37:59,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:37:59,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:37:59,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:37:59,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +23: [2023-05-25 13:37:59,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:37:59,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:37:59,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:37:59,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:37:59,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +24: [2023-05-25 13:37:59,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +23: [2023-05-25 13:37:59,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:37:59,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:37:59,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +14: [2023-05-25 13:37:59,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +14: [2023-05-25 13:37:59,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt... +14: [2023-05-25 13:37:59,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt... +14: [2023-05-25 13:37:59,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt... +14: [2023-05-25 13:37:59,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt... +14: [2023-05-25 13:37:59,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt... +14: [2023-05-25 13:37:59,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +24: [2023-05-25 13:37:59,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt... +23: [2023-05-25 13:37:59,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +23: [2023-05-25 13:37:59,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt... +23: [2023-05-25 13:37:59,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +14: [2023-05-25 13:37:59,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt... +23: [2023-05-25 13:37:59,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt... +24: [2023-05-25 13:37:59,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt... +24: [2023-05-25 13:37:59,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt... +23: [2023-05-25 13:37:59,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt... +23: [2023-05-25 13:37:59,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt... +24: [2023-05-25 13:37:59,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt... +23: [2023-05-25 13:37:59,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt... +23: [2023-05-25 13:37:59,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt... +24: [2023-05-25 13:37:59,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt... +24: [2023-05-25 13:37:59,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt... +24: [2023-05-25 13:37:59,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +24: [2023-05-25 13:37:59,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +22: [2023-05-25 13:37:59,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:37:59,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:37:59,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:37:59,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:37:59,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:37:59,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:37:59,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:37:59,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:37:59,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:37:59,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:37:59,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:37:59,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:37:59,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:37:59,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:37:59,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:37:59,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:37:59,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. + 4: [2023-05-25 13:37:59,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 4: [2023-05-25 13:37:59,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 9: [2023-05-25 13:37:59,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:37:59,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:37:59,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +18: [2023-05-25 13:37:59,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. + 9: [2023-05-25 13:37:59,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:37:59,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:37:59,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:37:59,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:37:59,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +16: [2023-05-25 13:37:59,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt... +22: [2023-05-25 13:37:59,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:37:59,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt... +16: [2023-05-25 13:37:59,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:37:59,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt... +16: [2023-05-25 13:37:59,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt... + 9: [2023-05-25 13:37:59,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt... +16: [2023-05-25 13:37:59,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt... + 9: [2023-05-25 13:37:59,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt... +16: [2023-05-25 13:37:59,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt... +16: [2023-05-25 13:37:59,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt... +16: [2023-05-25 13:37:59,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:37:59,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt... +16: [2023-05-25 13:37:59,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt... +22: [2023-05-25 13:37:59,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt... + 9: [2023-05-25 13:37:59,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt... +22: [2023-05-25 13:37:59,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:37:59,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt... +22: [2023-05-25 13:37:59,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt... + 9: [2023-05-25 13:37:59,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt... + 9: [2023-05-25 13:37:59,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:37:59,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt... + 9: [2023-05-25 13:37:59,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:37:59,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt... + 1: [2023-05-25 13:37:59,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:37:59,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:37:59,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. +20: [2023-05-25 13:37:59,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +20: [2023-05-25 13:37:59,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +20: [2023-05-25 13:37:59,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +20: [2023-05-25 13:37:59,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +20: [2023-05-25 13:37:59,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +20: [2023-05-25 13:37:59,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +20: [2023-05-25 13:37:59,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +20: [2023-05-25 13:37:59,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +26: [2023-05-25 13:37:59,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:37:59,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:37:59,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. + 5: [2023-05-25 13:37:59,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. +26: [2023-05-25 13:37:59,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:37:59,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:37:59,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:37:59,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:37:59,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +18: [2023-05-25 13:37:59,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +20: [2023-05-25 13:37:59,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt... +18: [2023-05-25 13:37:59,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... + 0: [2023-05-25 13:37:59,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. +11: [2023-05-25 13:37:59,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:37:59,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:37:59,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:37:59,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:37:59,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:37:59,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:37:59,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +20: [2023-05-25 13:37:59,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +11: [2023-05-25 13:37:59,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +20: [2023-05-25 13:37:59,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt... +27: [2023-05-25 13:37:59,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:37:59,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:37:59,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:37:59,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:37:59,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:37:59,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:37:59,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt... +26: [2023-05-25 13:37:59,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt... +27: [2023-05-25 13:37:59,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +20: [2023-05-25 13:37:59,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt... +27: [2023-05-25 13:37:59,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:37:59,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt... +20: [2023-05-25 13:37:59,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt... +20: [2023-05-25 13:37:59,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +26: [2023-05-25 13:37:59,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt... +20: [2023-05-25 13:37:59,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt... +26: [2023-05-25 13:37:59,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +20: [2023-05-25 13:37:59,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt... +29: [2023-05-25 13:37:59,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:37:59,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. + 7: [2023-05-25 13:37:59,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:37:59,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. +26: [2023-05-25 13:37:59,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt... +26: [2023-05-25 13:37:59,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt... +29: [2023-05-25 13:37:59,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:37:59,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +25: [2023-05-25 13:37:59,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +25: [2023-05-25 13:37:59,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:37:59,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +29: [2023-05-25 13:37:59,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:37:59,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:37:59,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:37:59,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:37:59,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:37:59,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. + 2: [2023-05-25 13:37:59,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:37:59,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. +25: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +25: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +25: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +11: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +25: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +25: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +25: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +17: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +31: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:37:59,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +11: [2023-05-25 13:37:59,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt... +11: [2023-05-25 13:37:59,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt... +28: [2023-05-25 13:37:59,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:37:59,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:37:59,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:37:59,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:37:59,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +11: [2023-05-25 13:37:59,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt... +28: [2023-05-25 13:37:59,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:37:59,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:37:59,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +11: [2023-05-25 13:37:59,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt... +11: [2023-05-25 13:37:59,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt... +17: [2023-05-25 13:37:59,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +29: [2023-05-25 13:37:59,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +11: [2023-05-25 13:37:59,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +27: [2023-05-25 13:37:59,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt... +29: [2023-05-25 13:37:59,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt... +11: [2023-05-25 13:37:59,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt... +29: [2023-05-25 13:37:59,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt... + 3: [2023-05-25 13:37:59,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:37:59,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:37:59,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 6: [2023-05-25 13:37:59,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:37:59,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:37:59,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:37:59,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:37:59,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:37:59,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:37:59,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. +29: [2023-05-25 13:37:59,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt... +29: [2023-05-25 13:37:59,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt... +29: [2023-05-25 13:37:59,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt... + 6: [2023-05-25 13:37:59,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. +29: [2023-05-25 13:37:59,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:37:59,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +10: [2023-05-25 13:37:59,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +29: [2023-05-25 13:37:59,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt... +10: [2023-05-25 13:37:59,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +31: [2023-05-25 13:37:59,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt... +31: [2023-05-25 13:37:59,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt... +28: [2023-05-25 13:37:59,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt... +28: [2023-05-25 13:37:59,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt... +28: [2023-05-25 13:37:59,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +30: [2023-05-25 13:37:59,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt... +28: [2023-05-25 13:37:59,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt... +28: [2023-05-25 13:37:59,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:37:59,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:37:59,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt... +30: [2023-05-25 13:37:59,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt... +25: [2023-05-25 13:37:59,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt... +31: [2023-05-25 13:37:59,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +27: [2023-05-25 13:37:59,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +27: [2023-05-25 13:37:59,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt... +25: [2023-05-25 13:37:59,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt... +25: [2023-05-25 13:37:59,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt... +28: [2023-05-25 13:37:59,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt... +31: [2023-05-25 13:37:59,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt... +28: [2023-05-25 13:37:59,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt... +27: [2023-05-25 13:37:59,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt... +27: [2023-05-25 13:37:59,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt... +25: [2023-05-25 13:37:59,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +28: [2023-05-25 13:37:59,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt... +31: [2023-05-25 13:37:59,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt... + 5: [2023-05-25 13:37:59,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +31: [2023-05-25 13:37:59,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt... +31: [2023-05-25 13:37:59,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt... +25: [2023-05-25 13:37:59,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt... +27: [2023-05-25 13:37:59,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt... +27: [2023-05-25 13:37:59,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt... +25: [2023-05-25 13:37:59,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt... +27: [2023-05-25 13:37:59,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... + 5: [2023-05-25 13:37:59,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +14: [2023-05-25 13:37:59,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 6: [2023-05-25 13:37:59,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 6: [2023-05-25 13:37:59,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt... +30: [2023-05-25 13:37:59,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... + 6: [2023-05-25 13:37:59,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt... + 6: [2023-05-25 13:37:59,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt... + 6: [2023-05-25 13:37:59,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt... + 6: [2023-05-25 13:37:59,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt... + 0: [2023-05-25 13:37:59,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:37:59,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt... + 6: [2023-05-25 13:37:59,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt... +30: [2023-05-25 13:37:59,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt... +30: [2023-05-25 13:37:59,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt... +30: [2023-05-25 13:37:59,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt... +30: [2023-05-25 13:37:59,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt... +30: [2023-05-25 13:37:59,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt... + 8: [2023-05-25 13:37:59,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:37:59,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:37:59,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:37:59,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:37:59,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:37:59,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:37:59,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:37:59,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:37:59,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +23: [2023-05-25 13:37:59,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. + 0: [2023-05-25 13:37:59,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +23: [2023-05-25 13:37:59,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +24: [2023-05-25 13:37:59,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:37:59,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +17: [2023-05-25 13:37:59,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... + 2: [2023-05-25 13:37:59,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 2: [2023-05-25 13:37:59,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +17: [2023-05-25 13:37:59,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... + 3: [2023-05-25 13:37:59,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. + 8: [2023-05-25 13:37:59,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt... + 7: [2023-05-25 13:37:59,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 8: [2023-05-25 13:37:59,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt... + 8: [2023-05-25 13:37:59,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt... + 7: [2023-05-25 13:37:59,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +10: [2023-05-25 13:37:59,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +10: [2023-05-25 13:37:59,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 8: [2023-05-25 13:37:59,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt... + 8: [2023-05-25 13:37:59,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt... + 8: [2023-05-25 13:37:59,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 8: [2023-05-25 13:37:59,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... + 8: [2023-05-25 13:37:59,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt... +22: [2023-05-25 13:37:59,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:37:59,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +14: [2023-05-25 13:37:59,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 0: [2023-05-25 13:37:59,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +19: [2023-05-25 13:37:59,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:37:59,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:37:59,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:37:59,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:37:59,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:37:59,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:37:59,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:37:59,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:37:59,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:37:59,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:37:59,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:37:59,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:37:59,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:37:59,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:37:59,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:37:59,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:37:59,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:37:59,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +14: [2023-05-25 13:37:59,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 3: [2023-05-25 13:37:59,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +13: [2023-05-25 13:37:59,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:37:59,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:37:59,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +19: [2023-05-25 13:37:59,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +19: [2023-05-25 13:37:59,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +23: [2023-05-25 13:37:59,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +13: [2023-05-25 13:37:59,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:37:59,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +19: [2023-05-25 13:37:59,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt... +19: [2023-05-25 13:37:59,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt... +19: [2023-05-25 13:37:59,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt... +19: [2023-05-25 13:37:59,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt... +19: [2023-05-25 13:37:59,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt... +19: [2023-05-25 13:37:59,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt... +13: [2023-05-25 13:37:59,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:37:59,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:37:59,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:37:59,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:37:59,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +23: [2023-05-25 13:37:59,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +24: [2023-05-25 13:37:59,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +24: [2023-05-25 13:37:59,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +21: [2023-05-25 13:37:59,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:37:59,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:37:59,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt... +21: [2023-05-25 13:37:59,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt... +21: [2023-05-25 13:37:59,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt... +21: [2023-05-25 13:37:59,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt... +21: [2023-05-25 13:37:59,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt... +13: [2023-05-25 13:37:59,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +21: [2023-05-25 13:37:59,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt... +13: [2023-05-25 13:37:59,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt... +13: [2023-05-25 13:37:59,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt... +13: [2023-05-25 13:37:59,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt... +13: [2023-05-25 13:37:59,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +13: [2023-05-25 13:37:59,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt... +13: [2023-05-25 13:37:59,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt... +13: [2023-05-25 13:37:59,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt... +16: [2023-05-25 13:37:59,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +12: [2023-05-25 13:37:59,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:37:59,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:37:59,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:37:59,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:37:59,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:37:59,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:37:59,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +22: [2023-05-25 13:37:59,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +16: [2023-05-25 13:37:59,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +11: [2023-05-25 13:37:59,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:37:59,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +26: [2023-05-25 13:37:59,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +20: [2023-05-25 13:37:59,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +26: [2023-05-25 13:37:59,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. + 9: [2023-05-25 13:37:59,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:37:59,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +29: [2023-05-25 13:37:59,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:37:59,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. + 3: [2023-05-25 13:37:59,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +15: [2023-05-25 13:37:59,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:37:59,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:37:59,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:37:59,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +25: [2023-05-25 13:37:59,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +20: [2023-05-25 13:37:59,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +15: [2023-05-25 13:37:59,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:37:59,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:37:59,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:37:59,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:37:59,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt... +12: [2023-05-25 13:37:59,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt... +15: [2023-05-25 13:37:59,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:37:59,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt... +12: [2023-05-25 13:37:59,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +22: [2023-05-25 13:37:59,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +28: [2023-05-25 13:37:59,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:37:59,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:37:59,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +15: [2023-05-25 13:37:59,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt... +25: [2023-05-25 13:37:59,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +12: [2023-05-25 13:37:59,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +12: [2023-05-25 13:37:59,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt... +12: [2023-05-25 13:37:59,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt... +12: [2023-05-25 13:37:59,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt... +15: [2023-05-25 13:37:59,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt... +15: [2023-05-25 13:37:59,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +27: [2023-05-25 13:37:59,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:37:59,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +15: [2023-05-25 13:37:59,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt... +15: [2023-05-25 13:37:59,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt... +15: [2023-05-25 13:37:59,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:37:59,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt... +15: [2023-05-25 13:37:59,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt... +11: [2023-05-25 13:37:59,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +30: [2023-05-25 13:37:59,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:37:59,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:37:59,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_00-model_states.pt. + 6: [2023-05-25 13:37:59,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. +20: [2023-05-25 13:37:59,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... + 6: [2023-05-25 13:37:59,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_00-model_states.pt. +26: [2023-05-25 13:37:59,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +26: [2023-05-25 13:37:59,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +31: [2023-05-25 13:37:59,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +11: [2023-05-25 13:37:59,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 8: [2023-05-25 13:37:59,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:37:59,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +29: [2023-05-25 13:37:59,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +20: [2023-05-25 13:37:59,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +28: [2023-05-25 13:37:59,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +28: [2023-05-25 13:37:59,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:37:59,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +25: [2023-05-25 13:37:59,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +31: [2023-05-25 13:37:59,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +19: [2023-05-25 13:37:59,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +25: [2023-05-25 13:37:59,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +13: [2023-05-25 13:37:59,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +21: [2023-05-25 13:37:59,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +30: [2023-05-25 13:37:59,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +27: [2023-05-25 13:37:59,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +30: [2023-05-25 13:37:59,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +27: [2023-05-25 13:37:59,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +19: [2023-05-25 13:37:59,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:37:59,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_00-model_states.pt. + 8: [2023-05-25 13:37:59,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 8: [2023-05-25 13:37:59,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +13: [2023-05-25 13:37:59,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +19: [2023-05-25 13:37:59,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +21: [2023-05-25 13:37:59,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +13: [2023-05-25 13:37:59,809] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 6: [2023-05-25 13:37:59,810] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 6: [2023-05-25 13:37:59,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +21: [2023-05-25 13:37:59,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +19: [2023-05-25 13:37:59,814] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +15: [2023-05-25 13:37:59,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:37:59,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +12: [2023-05-25 13:37:59,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:37:59,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:37:59,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:37:59,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +15: [2023-05-25 13:37:59,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +12: [2023-05-25 13:37:59,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +12: [2023-05-25 13:37:59,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 4: [2023-05-25 13:37:59,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt. + 4: [2023-05-25 13:37:59,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt. + 4: [2023-05-25 13:37:59,889] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 4: [2023-05-25 13:37:59,891] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 4: [2023-05-25 13:37:59,899] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt. + 4: [2023-05-25 13:37:59,900] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt. + 4: [2023-05-25 13:37:59,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 4: [2023-05-25 13:37:59,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:37:59,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt. + 1: [2023-05-25 13:37:59,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt. + 5: [2023-05-25 13:37:59,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt. + 5: [2023-05-25 13:37:59,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt. + 1: [2023-05-25 13:37:59,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt. + 1: [2023-05-25 13:37:59,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt. + 2: [2023-05-25 13:37:59,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt. + 2: [2023-05-25 13:37:59,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt. + 1: [2023-05-25 13:37:59,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:37:59,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 7: [2023-05-25 13:37:59,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt. + 5: [2023-05-25 13:37:59,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 5: [2023-05-25 13:37:59,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 7: [2023-05-25 13:37:59,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt. + 5: [2023-05-25 13:37:59,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt. + 5: [2023-05-25 13:37:59,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt. + 1: [2023-05-25 13:37:59,947] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:37:59,947] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 2: [2023-05-25 13:37:59,948] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 0: [2023-05-25 13:37:59,948] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt. + 0: [2023-05-25 13:37:59,948] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt. +17: [2023-05-25 13:37:59,949] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt. +17: [2023-05-25 13:37:59,949] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt. + 0: [2023-05-25 13:37:59,949] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt. + 0: [2023-05-25 13:37:59,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt. + 2: [2023-05-25 13:37:59,950] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 7: [2023-05-25 13:37:59,953] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 3: [2023-05-25 13:37:59,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt. + 3: [2023-05-25 13:37:59,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt. +28: [2023-05-25 13:37:59,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt. +28: [2023-05-25 13:37:59,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt. +23: [2023-05-25 13:37:59,958] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt. + 7: [2023-05-25 13:37:59,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +23: [2023-05-25 13:37:59,958] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt. + 5: [2023-05-25 13:37:59,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 5: [2023-05-25 13:37:59,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +17: [2023-05-25 13:37:59,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +17: [2023-05-25 13:37:59,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... + 0: [2023-05-25 13:37:59,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 0: [2023-05-25 13:37:59,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 0: [2023-05-25 13:37:59,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 4: [2023-05-25 13:37:59,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt. + 4: [2023-05-25 13:37:59,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt. + 0: [2023-05-25 13:37:59,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +28: [2023-05-25 13:37:59,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +28: [2023-05-25 13:37:59,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +23: [2023-05-25 13:37:59,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +23: [2023-05-25 13:37:59,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... + 6: [2023-05-25 13:37:59,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt. + 6: [2023-05-25 13:37:59,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt. + 3: [2023-05-25 13:37:59,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 7: [2023-05-25 13:37:59,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt. + 7: [2023-05-25 13:37:59,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt. + 2: [2023-05-25 13:37:59,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt. + 2: [2023-05-25 13:37:59,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt. + 8: [2023-05-25 13:37:59,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt. + 8: [2023-05-25 13:37:59,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt. +14: [2023-05-25 13:37:59,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt. +14: [2023-05-25 13:37:59,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt. + 3: [2023-05-25 13:37:59,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +27: [2023-05-25 13:37:59,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt. + 4: [2023-05-25 13:37:59,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 4: [2023-05-25 13:37:59,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +11: [2023-05-25 13:37:59,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt. +11: [2023-05-25 13:37:59,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt. +13: [2023-05-25 13:37:59,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt. + 7: [2023-05-25 13:37:59,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt. +27: [2023-05-25 13:37:59,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt. +13: [2023-05-25 13:37:59,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt. + 9: [2023-05-25 13:37:59,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt. + 9: [2023-05-25 13:37:59,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt. + 7: [2023-05-25 13:37:59,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt. + 3: [2023-05-25 13:37:59,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt. + 3: [2023-05-25 13:37:59,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_03-model_states.pt. +16: [2023-05-25 13:37:59,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt. +24: [2023-05-25 13:37:59,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +16: [2023-05-25 13:37:59,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt. +24: [2023-05-25 13:37:59,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +15: [2023-05-25 13:37:59,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt. +15: [2023-05-25 13:37:59,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt. +10: [2023-05-25 13:37:59,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt. +10: [2023-05-25 13:37:59,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt. +24: [2023-05-25 13:37:59,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +24: [2023-05-25 13:37:59,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +18: [2023-05-25 13:37:59,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt. +18: [2023-05-25 13:37:59,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt. + 8: [2023-05-25 13:37:59,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +27: [2023-05-25 13:37:59,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:37:59,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt. +12: [2023-05-25 13:37:59,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt. + 2: [2023-05-25 13:37:59,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 7: [2023-05-25 13:37:59,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 7: [2023-05-25 13:37:59,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +27: [2023-05-25 13:37:59,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +12: [2023-05-25 13:37:59,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt. + 8: [2023-05-25 13:37:59,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 6: [2023-05-25 13:37:59,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 5: [2023-05-25 13:37:59,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt. + 6: [2023-05-25 13:37:59,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +17: [2023-05-25 13:37:59,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt. + 2: [2023-05-25 13:37:59,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 5: [2023-05-25 13:37:59,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt. +14: [2023-05-25 13:37:59,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +17: [2023-05-25 13:37:59,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt. +14: [2023-05-25 13:37:59,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +27: [2023-05-25 13:37:59,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +16: [2023-05-25 13:37:59,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt. +16: [2023-05-25 13:37:59,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt. +11: [2023-05-25 13:37:59,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +27: [2023-05-25 13:37:59,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +11: [2023-05-25 13:37:59,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:37:59,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +13: [2023-05-25 13:37:59,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt. +13: [2023-05-25 13:37:59,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:37:59,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +13: [2023-05-25 13:37:59,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +27: [2023-05-25 13:37:59,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +24: [2023-05-25 13:37:59,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt. +13: [2023-05-25 13:37:59,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt. +24: [2023-05-25 13:37:59,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt. + 7: [2023-05-25 13:37:59,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +10: [2023-05-25 13:37:59,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +10: [2023-05-25 13:37:59,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +19: [2023-05-25 13:37:59,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt. +19: [2023-05-25 13:37:59,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt. + 7: [2023-05-25 13:37:59,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 6: [2023-05-25 13:37:59,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt. +14: [2023-05-25 13:37:59,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt. +26: [2023-05-25 13:37:59,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. + 6: [2023-05-25 13:37:59,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_02-model_states.pt. +26: [2023-05-25 13:37:59,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +14: [2023-05-25 13:37:59,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_02-model_states.pt. +27: [2023-05-25 13:37:59,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +15: [2023-05-25 13:37:59,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +30: [2023-05-25 13:37:59,997] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt. +28: [2023-05-25 13:37:59,997] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +28: [2023-05-25 13:37:59,997] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. + 3: [2023-05-25 13:37:59,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +16: [2023-05-25 13:37:59,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +15: [2023-05-25 13:37:59,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +23: [2023-05-25 13:37:59,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt. +23: [2023-05-25 13:37:59,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt. +26: [2023-05-25 13:37:59,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +16: [2023-05-25 13:37:59,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +30: [2023-05-25 13:37:59,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +26: [2023-05-25 13:37:59,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +18: [2023-05-25 13:37:59,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +28: [2023-05-25 13:38:00,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +28: [2023-05-25 13:38:00,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +22: [2023-05-25 13:38:00,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt. +22: [2023-05-25 13:38:00,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt. +12: [2023-05-25 13:38:00,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +17: [2023-05-25 13:38:00,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +31: [2023-05-25 13:38:00,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +17: [2023-05-25 13:38:00,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +31: [2023-05-25 13:38:00,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +13: [2023-05-25 13:38:00,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt. +20: [2023-05-25 13:38:00,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt. +20: [2023-05-25 13:38:00,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt. +20: [2023-05-25 13:38:00,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt. +20: [2023-05-25 13:38:00,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt. +31: [2023-05-25 13:38:00,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +31: [2023-05-25 13:38:00,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +14: [2023-05-25 13:38:00,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt. +24: [2023-05-25 13:38:00,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +16: [2023-05-25 13:38:00,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +16: [2023-05-25 13:38:00,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +13: [2023-05-25 13:38:00,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +12: [2023-05-25 13:38:00,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +29: [2023-05-25 13:38:00,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt. +24: [2023-05-25 13:38:00,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +21: [2023-05-25 13:38:00,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt. +31: [2023-05-25 13:38:00,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt. +31: [2023-05-25 13:38:00,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt. +29: [2023-05-25 13:38:00,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt. +18: [2023-05-25 13:38:00,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +13: [2023-05-25 13:38:00,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt. +14: [2023-05-25 13:38:00,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +13: [2023-05-25 13:38:00,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +19: [2023-05-25 13:38:00,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +19: [2023-05-25 13:38:00,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +14: [2023-05-25 13:38:00,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +28: [2023-05-25 13:38:00,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +28: [2023-05-25 13:38:00,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +23: [2023-05-25 13:38:00,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +23: [2023-05-25 13:38:00,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +14: [2023-05-25 13:38:00,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt. + 6: [2023-05-25 13:38:00,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +30: [2023-05-25 13:38:00,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +28: [2023-05-25 13:38:00,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt... +28: [2023-05-25 13:38:00,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt... + 6: [2023-05-25 13:38:00,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +22: [2023-05-25 13:38:00,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +30: [2023-05-25 13:38:00,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:38:00,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +21: [2023-05-25 13:38:00,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt. +30: [2023-05-25 13:38:00,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt. +30: [2023-05-25 13:38:00,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt. +21: [2023-05-25 13:38:00,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt. +13: [2023-05-25 13:38:00,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +22: [2023-05-25 13:38:00,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +29: [2023-05-25 13:38:00,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +21: [2023-05-25 13:38:00,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_02-model_states.pt. +14: [2023-05-25 13:38:00,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +21: [2023-05-25 13:38:00,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +20: [2023-05-25 13:38:00,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt. +31: [2023-05-25 13:38:00,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +20: [2023-05-25 13:38:00,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +29: [2023-05-25 13:38:00,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:00,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt. +31: [2023-05-25 13:38:00,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +20: [2023-05-25 13:38:00,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +20: [2023-05-25 13:38:00,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +30: [2023-05-25 13:38:00,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +30: [2023-05-25 13:38:00,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:38:00,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:38:00,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:38:00,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt. +29: [2023-05-25 13:38:00,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +13: [2023-05-25 13:38:00,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +29: [2023-05-25 13:38:00,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt. + 8: [2023-05-25 13:38:00,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt. + 8: [2023-05-25 13:38:00,022] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt. +25: [2023-05-25 13:38:00,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +14: [2023-05-25 13:38:00,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,024] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt. + 3: [2023-05-25 13:38:00,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt. +25: [2023-05-25 13:38:00,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +24: [2023-05-25 13:38:00,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:00,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:00,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt. +25: [2023-05-25 13:38:00,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +15: [2023-05-25 13:38:00,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt. +25: [2023-05-25 13:38:00,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt. +15: [2023-05-25 13:38:00,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt. +26: [2023-05-25 13:38:00,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt. +25: [2023-05-25 13:38:00,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt. +26: [2023-05-25 13:38:00,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt. +24: [2023-05-25 13:38:00,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt. + 2: [2023-05-25 13:38:00,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt. + 2: [2023-05-25 13:38:00,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt. +28: [2023-05-25 13:38:00,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +21: [2023-05-25 13:38:00,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +19: [2023-05-25 13:38:00,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt. +25: [2023-05-25 13:38:00,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +19: [2023-05-25 13:38:00,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt. +30: [2023-05-25 13:38:00,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +30: [2023-05-25 13:38:00,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +21: [2023-05-25 13:38:00,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +21: [2023-05-25 13:38:00,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +27: [2023-05-25 13:38:00,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +27: [2023-05-25 13:38:00,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +28: [2023-05-25 13:38:00,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +27: [2023-05-25 13:38:00,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:00,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:38:00,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +28: [2023-05-25 13:38:00,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt. +10: [2023-05-25 13:38:00,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt. +28: [2023-05-25 13:38:00,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt. +10: [2023-05-25 13:38:00,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt. +10: [2023-05-25 13:38:00,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt. +10: [2023-05-25 13:38:00,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt. +27: [2023-05-25 13:38:00,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt... +27: [2023-05-25 13:38:00,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +18: [2023-05-25 13:38:00,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt. +27: [2023-05-25 13:38:00,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt. +18: [2023-05-25 13:38:00,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt. +28: [2023-05-25 13:38:00,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt. +15: [2023-05-25 13:38:00,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +26: [2023-05-25 13:38:00,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +15: [2023-05-25 13:38:00,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +27: [2023-05-25 13:38:00,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt... +24: [2023-05-25 13:38:00,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +28: [2023-05-25 13:38:00,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt. +24: [2023-05-25 13:38:00,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +26: [2023-05-25 13:38:00,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +24: [2023-05-25 13:38:00,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +22: [2023-05-25 13:38:00,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +24: [2023-05-25 13:38:00,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +22: [2023-05-25 13:38:00,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +25: [2023-05-25 13:38:00,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt. +27: [2023-05-25 13:38:00,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt. +30: [2023-05-25 13:38:00,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt. +30: [2023-05-25 13:38:00,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt. +19: [2023-05-25 13:38:00,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +25: [2023-05-25 13:38:00,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt. +19: [2023-05-25 13:38:00,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +25: [2023-05-25 13:38:00,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +26: [2023-05-25 13:38:00,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:00,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +24: [2023-05-25 13:38:00,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +26: [2023-05-25 13:38:00,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +26: [2023-05-25 13:38:00,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt. +26: [2023-05-25 13:38:00,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt. +29: [2023-05-25 13:38:00,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt. +26: [2023-05-25 13:38:00,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt. +29: [2023-05-25 13:38:00,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt. +26: [2023-05-25 13:38:00,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt. +24: [2023-05-25 13:38:00,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt... +22: [2023-05-25 13:38:00,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +28: [2023-05-25 13:38:00,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +22: [2023-05-25 13:38:00,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +10: [2023-05-25 13:38:00,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +28: [2023-05-25 13:38:00,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +10: [2023-05-25 13:38:00,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt. +10: [2023-05-25 13:38:00,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt. +10: [2023-05-25 13:38:00,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +27: [2023-05-25 13:38:00,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +28: [2023-05-25 13:38:00,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +28: [2023-05-25 13:38:00,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +27: [2023-05-25 13:38:00,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:00,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +24: [2023-05-25 13:38:00,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +18: [2023-05-25 13:38:00,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:00,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +31: [2023-05-25 13:38:00,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +31: [2023-05-25 13:38:00,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +18: [2023-05-25 13:38:00,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:00,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +24: [2023-05-25 13:38:00,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt... +28: [2023-05-25 13:38:00,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt. +11: [2023-05-25 13:38:00,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt. +11: [2023-05-25 13:38:00,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +11: [2023-05-25 13:38:00,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +27: [2023-05-25 13:38:00,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +18: [2023-05-25 13:38:00,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +12: [2023-05-25 13:38:00,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt. +12: [2023-05-25 13:38:00,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt. +18: [2023-05-25 13:38:00,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt... +18: [2023-05-25 13:38:00,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt... +15: [2023-05-25 13:38:00,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt. +11: [2023-05-25 13:38:00,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +27: [2023-05-25 13:38:00,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt. +28: [2023-05-25 13:38:00,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +18: [2023-05-25 13:38:00,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +15: [2023-05-25 13:38:00,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt. + 1: [2023-05-25 13:38:00,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt. + 1: [2023-05-25 13:38:00,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt. +18: [2023-05-25 13:38:00,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +31: [2023-05-25 13:38:00,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt. +27: [2023-05-25 13:38:00,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt. +12: [2023-05-25 13:38:00,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt. +12: [2023-05-25 13:38:00,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt. +11: [2023-05-25 13:38:00,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt. +31: [2023-05-25 13:38:00,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_03-model_states.pt. + 0: [2023-05-25 13:38:00,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_01-model_01-model_states.pt. + 1: [2023-05-25 13:38:00,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt... +16: [2023-05-25 13:38:00,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +16: [2023-05-25 13:38:00,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +16: [2023-05-25 13:38:00,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +11: [2023-05-25 13:38:00,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt... +29: [2023-05-25 13:38:00,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt... +11: [2023-05-25 13:38:00,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt... +30: [2023-05-25 13:38:00,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:38:00,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +16: [2023-05-25 13:38:00,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt... +16: [2023-05-25 13:38:00,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt... + 1: [2023-05-25 13:38:00,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +30: [2023-05-25 13:38:00,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +16: [2023-05-25 13:38:00,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +11: [2023-05-25 13:38:00,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt. +29: [2023-05-25 13:38:00,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +11: [2023-05-25 13:38:00,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt. +16: [2023-05-25 13:38:00,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +31: [2023-05-25 13:38:00,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +16: [2023-05-25 13:38:00,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt... +27: [2023-05-25 13:38:00,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +16: [2023-05-25 13:38:00,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt... +26: [2023-05-25 13:38:00,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +26: [2023-05-25 13:38:00,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +16: [2023-05-25 13:38:00,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt... +26: [2023-05-25 13:38:00,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +26: [2023-05-25 13:38:00,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +16: [2023-05-25 13:38:00,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt... +30: [2023-05-25 13:38:00,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt... +16: [2023-05-25 13:38:00,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +25: [2023-05-25 13:38:00,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +25: [2023-05-25 13:38:00,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +30: [2023-05-25 13:38:00,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:38:00,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +20: [2023-05-25 13:38:00,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:00,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +26: [2023-05-25 13:38:00,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +31: [2023-05-25 13:38:00,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +26: [2023-05-25 13:38:00,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +21: [2023-05-25 13:38:00,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:00,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +31: [2023-05-25 13:38:00,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt... +31: [2023-05-25 13:38:00,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +20: [2023-05-25 13:38:00,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +20: [2023-05-25 13:38:00,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +30: [2023-05-25 13:38:00,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt... +21: [2023-05-25 13:38:00,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +21: [2023-05-25 13:38:00,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +29: [2023-05-25 13:38:00,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +22: [2023-05-25 13:38:00,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +31: [2023-05-25 13:38:00,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +22: [2023-05-25 13:38:00,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +29: [2023-05-25 13:38:00,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +15: [2023-05-25 13:38:00,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +15: [2023-05-25 13:38:00,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +17: [2023-05-25 13:38:00,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:00,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +17: [2023-05-25 13:38:00,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +27: [2023-05-25 13:38:00,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +17: [2023-05-25 13:38:00,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +17: [2023-05-25 13:38:00,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +17: [2023-05-25 13:38:00,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +29: [2023-05-25 13:38:00,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +31: [2023-05-25 13:38:00,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +31: [2023-05-25 13:38:00,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt... +17: [2023-05-25 13:38:00,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +27: [2023-05-25 13:38:00,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +20: [2023-05-25 13:38:00,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:00,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:00,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +25: [2023-05-25 13:38:00,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:00,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +31: [2023-05-25 13:38:00,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +12: [2023-05-25 13:38:00,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +29: [2023-05-25 13:38:00,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt... +23: [2023-05-25 13:38:00,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:00,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:00,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:00,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:00,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:00,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +25: [2023-05-25 13:38:00,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt. +21: [2023-05-25 13:38:00,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +25: [2023-05-25 13:38:00,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt. + 3: [2023-05-25 13:38:00,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +22: [2023-05-25 13:38:00,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt... + 3: [2023-05-25 13:38:00,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +20: [2023-05-25 13:38:00,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt... +24: [2023-05-25 13:38:00,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt. +17: [2023-05-25 13:38:00,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt... +24: [2023-05-25 13:38:00,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_02-model_states.pt. +29: [2023-05-25 13:38:00,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt... +20: [2023-05-25 13:38:00,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt... +23: [2023-05-25 13:38:00,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +20: [2023-05-25 13:38:00,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt... +20: [2023-05-25 13:38:00,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt... +13: [2023-05-25 13:38:00,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +23: [2023-05-25 13:38:00,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +17: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt... + 4: [2023-05-25 13:38:00,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +13: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +13: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +17: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +22: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt... +17: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt... + 4: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +17: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt... +25: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +17: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +12: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +23: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt... +23: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt... +12: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +13: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +13: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +13: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +23: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt... +23: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt... + 4: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +13: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +21: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +13: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +21: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:00,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +13: [2023-05-25 13:38:00,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +30: [2023-05-25 13:38:00,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +19: [2023-05-25 13:38:00,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:00,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +11: [2023-05-25 13:38:00,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +19: [2023-05-25 13:38:00,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:00,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +11: [2023-05-25 13:38:00,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +13: [2023-05-25 13:38:00,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt... + 3: [2023-05-25 13:38:00,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt... + 3: [2023-05-25 13:38:00,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt... +13: [2023-05-25 13:38:00,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt... + 3: [2023-05-25 13:38:00,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt... +31: [2023-05-25 13:38:00,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt. +31: [2023-05-25 13:38:00,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_36-model_01-model_states.pt. + 0: [2023-05-25 13:38:00,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt... + 4: [2023-05-25 13:38:00,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt... + 4: [2023-05-25 13:38:00,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt... + 4: [2023-05-25 13:38:00,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt... + 4: [2023-05-25 13:38:00,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt... + 4: [2023-05-25 13:38:00,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt... +21: [2023-05-25 13:38:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt... +21: [2023-05-25 13:38:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt... + 4: [2023-05-25 13:38:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt... +13: [2023-05-25 13:38:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +21: [2023-05-25 13:38:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt... +19: [2023-05-25 13:38:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +21: [2023-05-25 13:38:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt... +22: [2023-05-25 13:38:00,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +13: [2023-05-25 13:38:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt... +13: [2023-05-25 13:38:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt... +30: [2023-05-25 13:38:00,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt... +13: [2023-05-25 13:38:00,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt... +13: [2023-05-25 13:38:00,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt... +30: [2023-05-25 13:38:00,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +12: [2023-05-25 13:38:00,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +22: [2023-05-25 13:38:00,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +30: [2023-05-25 13:38:00,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt. +19: [2023-05-25 13:38:00,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt... +19: [2023-05-25 13:38:00,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt... + 9: [2023-05-25 13:38:00,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_01-model_states.pt. + 0: [2023-05-25 13:38:00,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +22: [2023-05-25 13:38:00,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt. +22: [2023-05-25 13:38:00,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_03-model_states.pt. +17: [2023-05-25 13:38:00,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt. +17: [2023-05-25 13:38:00,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt. +24: [2023-05-25 13:38:00,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt... +24: [2023-05-25 13:38:00,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:00,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:00,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:38:00,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +23: [2023-05-25 13:38:00,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt. +29: [2023-05-25 13:38:00,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +11: [2023-05-25 13:38:00,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt. + 0: [2023-05-25 13:38:00,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt... +23: [2023-05-25 13:38:00,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt. +19: [2023-05-25 13:38:00,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:00,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +11: [2023-05-25 13:38:00,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_14-model_03-model_states.pt. + 0: [2023-05-25 13:38:00,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt... + 0: [2023-05-25 13:38:00,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt... +26: [2023-05-25 13:38:00,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +20: [2023-05-25 13:38:00,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt. +20: [2023-05-25 13:38:00,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt. +29: [2023-05-25 13:38:00,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:00,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +18: [2023-05-25 13:38:00,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +25: [2023-05-25 13:38:00,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +19: [2023-05-25 13:38:00,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt. +19: [2023-05-25 13:38:00,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt. +19: [2023-05-25 13:38:00,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt... +19: [2023-05-25 13:38:00,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt... +24: [2023-05-25 13:38:00,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt... +24: [2023-05-25 13:38:00,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt... +25: [2023-05-25 13:38:00,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +25: [2023-05-25 13:38:00,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +25: [2023-05-25 13:38:00,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +26: [2023-05-25 13:38:00,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +21: [2023-05-25 13:38:00,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt. + 5: [2023-05-25 13:38:00,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +21: [2023-05-25 13:38:00,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt. +29: [2023-05-25 13:38:00,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt... + 9: [2023-05-25 13:38:00,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +29: [2023-05-25 13:38:00,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt... +26: [2023-05-25 13:38:00,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt... + 9: [2023-05-25 13:38:00,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +25: [2023-05-25 13:38:00,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +11: [2023-05-25 13:38:00,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +31: [2023-05-25 13:38:00,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +28: [2023-05-25 13:38:00,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +28: [2023-05-25 13:38:00,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt... +26: [2023-05-25 13:38:00,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt... +31: [2023-05-25 13:38:00,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt... +18: [2023-05-25 13:38:00,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:00,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +16: [2023-05-25 13:38:00,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +16: [2023-05-25 13:38:00,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +28: [2023-05-25 13:38:00,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt... +25: [2023-05-25 13:38:00,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +22: [2023-05-25 13:38:00,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt... + 5: [2023-05-25 13:38:00,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt... + 5: [2023-05-25 13:38:00,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt... + 9: [2023-05-25 13:38:00,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +10: [2023-05-25 13:38:00,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +10: [2023-05-25 13:38:00,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt... + 5: [2023-05-25 13:38:00,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt... +10: [2023-05-25 13:38:00,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +10: [2023-05-25 13:38:00,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +10: [2023-05-25 13:38:00,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +10: [2023-05-25 13:38:00,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +25: [2023-05-25 13:38:00,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt... +10: [2023-05-25 13:38:00,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +10: [2023-05-25 13:38:00,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt... + 9: [2023-05-25 13:38:00,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt... +25: [2023-05-25 13:38:00,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt... + 9: [2023-05-25 13:38:00,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt... +30: [2023-05-25 13:38:00,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +28: [2023-05-25 13:38:00,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +28: [2023-05-25 13:38:00,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt... +28: [2023-05-25 13:38:00,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt... +28: [2023-05-25 13:38:00,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt... + 9: [2023-05-25 13:38:00,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +18: [2023-05-25 13:38:00,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt... + 2: [2023-05-25 13:38:00,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +30: [2023-05-25 13:38:00,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +10: [2023-05-25 13:38:00,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +10: [2023-05-25 13:38:00,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt... +10: [2023-05-25 13:38:00,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt... +10: [2023-05-25 13:38:00,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +10: [2023-05-25 13:38:00,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt... +10: [2023-05-25 13:38:00,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt... +14: [2023-05-25 13:38:00,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +14: [2023-05-25 13:38:00,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +14: [2023-05-25 13:38:00,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +14: [2023-05-25 13:38:00,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +10: [2023-05-25 13:38:00,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt... +10: [2023-05-25 13:38:00,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt... +14: [2023-05-25 13:38:00,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +14: [2023-05-25 13:38:00,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +14: [2023-05-25 13:38:00,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +22: [2023-05-25 13:38:00,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +14: [2023-05-25 13:38:00,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +22: [2023-05-25 13:38:00,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +27: [2023-05-25 13:38:00,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +28: [2023-05-25 13:38:00,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt... +17: [2023-05-25 13:38:00,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +17: [2023-05-25 13:38:00,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt... + 2: [2023-05-25 13:38:00,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt... +11: [2023-05-25 13:38:00,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt... +23: [2023-05-25 13:38:00,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +23: [2023-05-25 13:38:00,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +11: [2023-05-25 13:38:00,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +30: [2023-05-25 13:38:00,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt... +20: [2023-05-25 13:38:00,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +22: [2023-05-25 13:38:00,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +21: [2023-05-25 13:38:00,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +14: [2023-05-25 13:38:00,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +14: [2023-05-25 13:38:00,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt... + 1: [2023-05-25 13:38:00,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +14: [2023-05-25 13:38:00,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt... +18: [2023-05-25 13:38:00,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +14: [2023-05-25 13:38:00,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt... +14: [2023-05-25 13:38:00,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt... +11: [2023-05-25 13:38:00,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +19: [2023-05-25 13:38:00,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +20: [2023-05-25 13:38:00,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +14: [2023-05-25 13:38:00,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +14: [2023-05-25 13:38:00,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt... +14: [2023-05-25 13:38:00,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt... + 2: [2023-05-25 13:38:00,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt... + 2: [2023-05-25 13:38:00,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt... +21: [2023-05-25 13:38:00,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt... + 2: [2023-05-25 13:38:00,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt... +21: [2023-05-25 13:38:00,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +27: [2023-05-25 13:38:00,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt... +18: [2023-05-25 13:38:00,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +19: [2023-05-25 13:38:00,098] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +26: [2023-05-25 13:38:00,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +13: [2023-05-25 13:38:00,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +27: [2023-05-25 13:38:00,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +18: [2023-05-25 13:38:00,098] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt... +15: [2023-05-25 13:38:00,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +15: [2023-05-25 13:38:00,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +26: [2023-05-25 13:38:00,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt... +26: [2023-05-25 13:38:00,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +26: [2023-05-25 13:38:00,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +11: [2023-05-25 13:38:00,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +29: [2023-05-25 13:38:00,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:00,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +27: [2023-05-25 13:38:00,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt... +23: [2023-05-25 13:38:00,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:00,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +16: [2023-05-25 13:38:00,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt. +15: [2023-05-25 13:38:00,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +15: [2023-05-25 13:38:00,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +15: [2023-05-25 13:38:00,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +15: [2023-05-25 13:38:00,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +15: [2023-05-25 13:38:00,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +29: [2023-05-25 13:38:00,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt... +16: [2023-05-25 13:38:00,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt. +29: [2023-05-25 13:38:00,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt... +26: [2023-05-25 13:38:00,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt... +26: [2023-05-25 13:38:00,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt... +26: [2023-05-25 13:38:00,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt... +26: [2023-05-25 13:38:00,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt... +16: [2023-05-25 13:38:00,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +15: [2023-05-25 13:38:00,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt... +16: [2023-05-25 13:38:00,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +17: [2023-05-25 13:38:00,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +25: [2023-05-25 13:38:00,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:00,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +18: [2023-05-25 13:38:00,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +15: [2023-05-25 13:38:00,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt... +21: [2023-05-25 13:38:00,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +15: [2023-05-25 13:38:00,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt... + 8: [2023-05-25 13:38:00,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +31: [2023-05-25 13:38:00,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +27: [2023-05-25 13:38:00,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +15: [2023-05-25 13:38:00,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +27: [2023-05-25 13:38:00,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +15: [2023-05-25 13:38:00,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt... +20: [2023-05-25 13:38:00,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +13: [2023-05-25 13:38:00,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +30: [2023-05-25 13:38:00,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt... + 8: [2023-05-25 13:38:00,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt... +18: [2023-05-25 13:38:00,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt. + 8: [2023-05-25 13:38:00,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt... +18: [2023-05-25 13:38:00,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt. +31: [2023-05-25 13:38:00,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt... + 1: [2023-05-25 13:38:00,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +17: [2023-05-25 13:38:00,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt... + 8: [2023-05-25 13:38:00,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt... +25: [2023-05-25 13:38:00,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt... +25: [2023-05-25 13:38:00,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt... +20: [2023-05-25 13:38:00,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +30: [2023-05-25 13:38:00,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +27: [2023-05-25 13:38:00,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt... +27: [2023-05-25 13:38:00,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt... + 4: [2023-05-25 13:38:00,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt... + 3: [2023-05-25 13:38:00,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +30: [2023-05-25 13:38:00,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt... +19: [2023-05-25 13:38:00,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:00,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +30: [2023-05-25 13:38:00,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt... +11: [2023-05-25 13:38:00,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:00,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +15: [2023-05-25 13:38:00,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +31: [2023-05-25 13:38:00,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +12: [2023-05-25 13:38:00,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt... + 1: [2023-05-25 13:38:00,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt... +12: [2023-05-25 13:38:00,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +15: [2023-05-25 13:38:00,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt... +15: [2023-05-25 13:38:00,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt... +31: [2023-05-25 13:38:00,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt... +13: [2023-05-25 13:38:00,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +19: [2023-05-25 13:38:00,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +12: [2023-05-25 13:38:00,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +12: [2023-05-25 13:38:00,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt... +12: [2023-05-25 13:38:00,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt... +16: [2023-05-25 13:38:00,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +12: [2023-05-25 13:38:00,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt... +12: [2023-05-25 13:38:00,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt... +12: [2023-05-25 13:38:00,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt... +12: [2023-05-25 13:38:00,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt... +16: [2023-05-25 13:38:00,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +11: [2023-05-25 13:38:00,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +22: [2023-05-25 13:38:00,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt. + 6: [2023-05-25 13:38:00,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +22: [2023-05-25 13:38:00,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_25-model_01-model_states.pt. +24: [2023-05-25 13:38:00,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +23: [2023-05-25 13:38:00,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +23: [2023-05-25 13:38:00,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +12: [2023-05-25 13:38:00,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt... +21: [2023-05-25 13:38:00,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +25: [2023-05-25 13:38:00,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +20: [2023-05-25 13:38:00,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +18: [2023-05-25 13:38:00,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +24: [2023-05-25 13:38:00,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:00,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt... +11: [2023-05-25 13:38:00,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt... +17: [2023-05-25 13:38:00,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +24: [2023-05-25 13:38:00,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt... +18: [2023-05-25 13:38:00,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +11: [2023-05-25 13:38:00,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt... + 6: [2023-05-25 13:38:00,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt... + 5: [2023-05-25 13:38:00,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt... + 6: [2023-05-25 13:38:00,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt... + 6: [2023-05-25 13:38:00,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt... + 6: [2023-05-25 13:38:00,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt... +10: [2023-05-25 13:38:00,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +25: [2023-05-25 13:38:00,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt... +13: [2023-05-25 13:38:00,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +25: [2023-05-25 13:38:00,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +10: [2023-05-25 13:38:00,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt... + 5: [2023-05-25 13:38:00,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +20: [2023-05-25 13:38:00,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +14: [2023-05-25 13:38:00,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt... + 0: [2023-05-25 13:38:00,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt... +25: [2023-05-25 13:38:00,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt... + 3: [2023-05-25 13:38:00,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +17: [2023-05-25 13:38:00,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +31: [2023-05-25 13:38:00,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +19: [2023-05-25 13:38:00,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +14: [2023-05-25 13:38:00,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +31: [2023-05-25 13:38:00,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_00-model_states.pt. +19: [2023-05-25 13:38:00,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt... + 2: [2023-05-25 13:38:00,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +22: [2023-05-25 13:38:00,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +31: [2023-05-25 13:38:00,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt... +31: [2023-05-25 13:38:00,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt... +22: [2023-05-25 13:38:00,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt... +23: [2023-05-25 13:38:00,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +17: [2023-05-25 13:38:00,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +17: [2023-05-25 13:38:00,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +15: [2023-05-25 13:38:00,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +23: [2023-05-25 13:38:00,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:00,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt... +17: [2023-05-25 13:38:00,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt... + 0: [2023-05-25 13:38:00,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +17: [2023-05-25 13:38:00,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt... +23: [2023-05-25 13:38:00,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt... +20: [2023-05-25 13:38:00,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +22: [2023-05-25 13:38:00,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +10: [2023-05-25 13:38:00,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:00,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +10: [2023-05-25 13:38:00,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +21: [2023-05-25 13:38:00,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +21: [2023-05-25 13:38:00,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +14: [2023-05-25 13:38:00,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +20: [2023-05-25 13:38:00,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt... + 5: [2023-05-25 13:38:00,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +11: [2023-05-25 13:38:00,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt... +22: [2023-05-25 13:38:00,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt... +15: [2023-05-25 13:38:00,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +20: [2023-05-25 13:38:00,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +22: [2023-05-25 13:38:00,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:00,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt... +21: [2023-05-25 13:38:00,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt... + 7: [2023-05-25 13:38:00,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... +19: [2023-05-25 13:38:00,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +11: [2023-05-25 13:38:00,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt... + 7: [2023-05-25 13:38:00,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt... +19: [2023-05-25 13:38:00,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt... + 7: [2023-05-25 13:38:00,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt... + 9: [2023-05-25 13:38:00,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +14: [2023-05-25 13:38:00,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt... + 9: [2023-05-25 13:38:00,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt... + 8: [2023-05-25 13:38:00,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +19: [2023-05-25 13:38:00,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt... + 8: [2023-05-25 13:38:00,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt... +20: [2023-05-25 13:38:00,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt... +22: [2023-05-25 13:38:00,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt... +19: [2023-05-25 13:38:00,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt... + 9: [2023-05-25 13:38:00,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt... + 2: [2023-05-25 13:38:00,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +12: [2023-05-25 13:38:00,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_00-model_states.pt. +15: [2023-05-25 13:38:00,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:00,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +16: [2023-05-25 13:38:00,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +16: [2023-05-25 13:38:00,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:00,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +16: [2023-05-25 13:38:00,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt... +16: [2023-05-25 13:38:00,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt... + 6: [2023-05-25 13:38:00,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +18: [2023-05-25 13:38:00,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt... +18: [2023-05-25 13:38:00,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:00,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt... +12: [2023-05-25 13:38:00,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +12: [2023-05-25 13:38:00,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +22: [2023-05-25 13:38:00,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. +22: [2023-05-25 13:38:00,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt... +22: [2023-05-25 13:38:00,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_00-model_states.pt. +22: [2023-05-25 13:38:00,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt... + 7: [2023-05-25 13:38:00,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +30: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt. +27: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt. +27: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt. + 4: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt. + 4: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt. +30: [2023-05-25 13:38:00,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt. +29: [2023-05-25 13:38:00,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt. +29: [2023-05-25 13:38:00,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt. +30: [2023-05-25 13:38:00,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +27: [2023-05-25 13:38:00,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +28: [2023-05-25 13:38:00,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt. +28: [2023-05-25 13:38:00,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt. + 4: [2023-05-25 13:38:00,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +27: [2023-05-25 13:38:00,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt. + 1: [2023-05-25 13:38:00,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt. + 5: [2023-05-25 13:38:00,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt. + 5: [2023-05-25 13:38:00,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt. +30: [2023-05-25 13:38:00,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +29: [2023-05-25 13:38:00,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +29: [2023-05-25 13:38:00,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +25: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt. +25: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt. +28: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +28: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +24: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt. +24: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt. +26: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt. +26: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt. + 5: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +31: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt. +31: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_02-model_states.pt. +25: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +25: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +31: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +31: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +31: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +31: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +24: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +31: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +24: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +26: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +26: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt. + 0: [2023-05-25 13:38:00,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt. +31: [2023-05-25 13:38:00,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +20: [2023-05-25 13:38:00,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt. +20: [2023-05-25 13:38:00,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt. +28: [2023-05-25 13:38:00,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +28: [2023-05-25 13:38:00,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +28: [2023-05-25 13:38:00,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +28: [2023-05-25 13:38:00,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,309] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt. + 2: [2023-05-25 13:38:00,309] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt. +16: [2023-05-25 13:38:00,309] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt. +16: [2023-05-25 13:38:00,309] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt. +26: [2023-05-25 13:38:00,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +26: [2023-05-25 13:38:00,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +11: [2023-05-25 13:38:00,312] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt. +26: [2023-05-25 13:38:00,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +26: [2023-05-25 13:38:00,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +27: [2023-05-25 13:38:00,315] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +27: [2023-05-25 13:38:00,315] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +27: [2023-05-25 13:38:00,315] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +27: [2023-05-25 13:38:00,315] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +20: [2023-05-25 13:38:00,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +28: [2023-05-25 13:38:00,315] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +28: [2023-05-25 13:38:00,315] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +23: [2023-05-25 13:38:00,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt. +23: [2023-05-25 13:38:00,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt. +20: [2023-05-25 13:38:00,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +27: [2023-05-25 13:38:00,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +27: [2023-05-25 13:38:00,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +27: [2023-05-25 13:38:00,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt... +27: [2023-05-25 13:38:00,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt... +11: [2023-05-25 13:38:00,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt. +28: [2023-05-25 13:38:00,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt... +28: [2023-05-25 13:38:00,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt... +13: [2023-05-25 13:38:00,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt. +13: [2023-05-25 13:38:00,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt. + 6: [2023-05-25 13:38:00,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt. + 0: [2023-05-25 13:38:00,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt. +12: [2023-05-25 13:38:00,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt. + 0: [2023-05-25 13:38:00,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt. + 6: [2023-05-25 13:38:00,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt. +16: [2023-05-25 13:38:00,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +16: [2023-05-25 13:38:00,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +12: [2023-05-25 13:38:00,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt. +24: [2023-05-25 13:38:00,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +24: [2023-05-25 13:38:00,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +11: [2023-05-25 13:38:00,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +24: [2023-05-25 13:38:00,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +24: [2023-05-25 13:38:00,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +21: [2023-05-25 13:38:00,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt. +21: [2023-05-25 13:38:00,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt. +23: [2023-05-25 13:38:00,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +23: [2023-05-25 13:38:00,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +19: [2023-05-25 13:38:00,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt. +19: [2023-05-25 13:38:00,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt. +11: [2023-05-25 13:38:00,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt. + 3: [2023-05-25 13:38:00,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt. +17: [2023-05-25 13:38:00,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt. +17: [2023-05-25 13:38:00,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt. +31: [2023-05-25 13:38:00,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +31: [2023-05-25 13:38:00,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +25: [2023-05-25 13:38:00,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +25: [2023-05-25 13:38:00,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +13: [2023-05-25 13:38:00,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt. +30: [2023-05-25 13:38:00,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +30: [2023-05-25 13:38:00,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +30: [2023-05-25 13:38:00,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +25: [2023-05-25 13:38:00,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +30: [2023-05-25 13:38:00,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +13: [2023-05-25 13:38:00,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +25: [2023-05-25 13:38:00,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +13: [2023-05-25 13:38:00,335] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +13: [2023-05-25 13:38:00,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt. +29: [2023-05-25 13:38:00,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +29: [2023-05-25 13:38:00,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +29: [2023-05-25 13:38:00,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +29: [2023-05-25 13:38:00,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +18: [2023-05-25 13:38:00,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt. +12: [2023-05-25 13:38:00,336] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +31: [2023-05-25 13:38:00,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +18: [2023-05-25 13:38:00,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt. +25: [2023-05-25 13:38:00,336] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt. +30: [2023-05-25 13:38:00,336] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt... +25: [2023-05-25 13:38:00,337] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt. +30: [2023-05-25 13:38:00,337] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +25: [2023-05-25 13:38:00,337] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt... +25: [2023-05-25 13:38:00,337] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt... +24: [2023-05-25 13:38:00,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +30: [2023-05-25 13:38:00,337] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt... +30: [2023-05-25 13:38:00,337] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +24: [2023-05-25 13:38:00,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +29: [2023-05-25 13:38:00,337] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +29: [2023-05-25 13:38:00,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +29: [2023-05-25 13:38:00,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt... +29: [2023-05-25 13:38:00,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt... +22: [2023-05-25 13:38:00,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt. + 0: [2023-05-25 13:38:00,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +22: [2023-05-25 13:38:00,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_02-model_states.pt. + 6: [2023-05-25 13:38:00,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +24: [2023-05-25 13:38:00,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt... +26: [2023-05-25 13:38:00,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +26: [2023-05-25 13:38:00,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +24: [2023-05-25 13:38:00,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt... +12: [2023-05-25 13:38:00,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt. + 0: [2023-05-25 13:38:00,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt. +31: [2023-05-25 13:38:00,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +21: [2023-05-25 13:38:00,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +28: [2023-05-25 13:38:00,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt. +28: [2023-05-25 13:38:00,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt. + 3: [2023-05-25 13:38:00,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt. + 6: [2023-05-25 13:38:00,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt. +26: [2023-05-25 13:38:00,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt... +26: [2023-05-25 13:38:00,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt... + 2: [2023-05-25 13:38:00,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt. +21: [2023-05-25 13:38:00,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +19: [2023-05-25 13:38:00,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +19: [2023-05-25 13:38:00,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt. +17: [2023-05-25 13:38:00,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +17: [2023-05-25 13:38:00,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt. + 6: [2023-05-25 13:38:00,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt. + 9: [2023-05-25 13:38:00,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt. + 9: [2023-05-25 13:38:00,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt. +31: [2023-05-25 13:38:00,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt. + 5: [2023-05-25 13:38:00,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt. + 5: [2023-05-25 13:38:00,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt. + 5: [2023-05-25 13:38:00,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt. +31: [2023-05-25 13:38:00,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +26: [2023-05-25 13:38:00,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +31: [2023-05-25 13:38:00,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt... +31: [2023-05-25 13:38:00,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt... +27: [2023-05-25 13:38:00,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +27: [2023-05-25 13:38:00,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +18: [2023-05-25 13:38:00,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +13: [2023-05-25 13:38:00,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +20: [2023-05-25 13:38:00,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +20: [2023-05-25 13:38:00,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +13: [2023-05-25 13:38:00,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt. +20: [2023-05-25 13:38:00,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt. + 7: [2023-05-25 13:38:00,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_02-model_states.pt. + 4: [2023-05-25 13:38:00,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt. +20: [2023-05-25 13:38:00,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:00,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +18: [2023-05-25 13:38:00,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:00,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +28: [2023-05-25 13:38:00,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +28: [2023-05-25 13:38:00,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:00,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt. +26: [2023-05-25 13:38:00,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +30: [2023-05-25 13:38:00,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt. + 0: [2023-05-25 13:38:00,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +20: [2023-05-25 13:38:00,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +20: [2023-05-25 13:38:00,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +10: [2023-05-25 13:38:00,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt. + 8: [2023-05-25 13:38:00,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt. +24: [2023-05-25 13:38:00,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt. +24: [2023-05-25 13:38:00,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt. +24: [2023-05-25 13:38:00,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt. +20: [2023-05-25 13:38:00,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt... +20: [2023-05-25 13:38:00,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt... + 3: [2023-05-25 13:38:00,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +16: [2023-05-25 13:38:00,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:00,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +24: [2023-05-25 13:38:00,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +16: [2023-05-25 13:38:00,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +28: [2023-05-25 13:38:00,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt. +28: [2023-05-25 13:38:00,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt. + 9: [2023-05-25 13:38:00,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +10: [2023-05-25 13:38:00,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt. +16: [2023-05-25 13:38:00,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +26: [2023-05-25 13:38:00,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt. + 1: [2023-05-25 13:38:00,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt. + 9: [2023-05-25 13:38:00,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt. +16: [2023-05-25 13:38:00,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt. +16: [2023-05-25 13:38:00,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +21: [2023-05-25 13:38:00,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +27: [2023-05-25 13:38:00,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +21: [2023-05-25 13:38:00,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +27: [2023-05-25 13:38:00,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +23: [2023-05-25 13:38:00,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:00,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +21: [2023-05-25 13:38:00,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt. +16: [2023-05-25 13:38:00,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt... +21: [2023-05-25 13:38:00,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt. +16: [2023-05-25 13:38:00,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt... + 6: [2023-05-25 13:38:00,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt. + 1: [2023-05-25 13:38:00,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt. + 3: [2023-05-25 13:38:00,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +23: [2023-05-25 13:38:00,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +23: [2023-05-25 13:38:00,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +23: [2023-05-25 13:38:00,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:00,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +29: [2023-05-25 13:38:00,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +25: [2023-05-25 13:38:00,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +11: [2023-05-25 13:38:00,369] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt. +30: [2023-05-25 13:38:00,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +26: [2023-05-25 13:38:00,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +10: [2023-05-25 13:38:00,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +25: [2023-05-25 13:38:00,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +29: [2023-05-25 13:38:00,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt. +29: [2023-05-25 13:38:00,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt. +11: [2023-05-25 13:38:00,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt. +30: [2023-05-25 13:38:00,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +24: [2023-05-25 13:38:00,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +30: [2023-05-25 13:38:00,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +10: [2023-05-25 13:38:00,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt. +10: [2023-05-25 13:38:00,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +29: [2023-05-25 13:38:00,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +10: [2023-05-25 13:38:00,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt. +19: [2023-05-25 13:38:00,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +19: [2023-05-25 13:38:00,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +14: [2023-05-25 13:38:00,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt. +28: [2023-05-25 13:38:00,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +28: [2023-05-25 13:38:00,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +14: [2023-05-25 13:38:00,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt. + 8: [2023-05-25 13:38:00,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +23: [2023-05-25 13:38:00,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt... +24: [2023-05-25 13:38:00,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +23: [2023-05-25 13:38:00,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt... +19: [2023-05-25 13:38:00,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +24: [2023-05-25 13:38:00,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +19: [2023-05-25 13:38:00,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +24: [2023-05-25 13:38:00,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +30: [2023-05-25 13:38:00,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt. + 3: [2023-05-25 13:38:00,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt. + 2: [2023-05-25 13:38:00,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt. +26: [2023-05-25 13:38:00,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt. + 9: [2023-05-25 13:38:00,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +26: [2023-05-25 13:38:00,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt. + 7: [2023-05-25 13:38:00,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt. + 2: [2023-05-25 13:38:00,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt. + 1: [2023-05-25 13:38:00,380] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,380] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_03-model_states.pt. +21: [2023-05-25 13:38:00,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +21: [2023-05-25 13:38:00,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +17: [2023-05-25 13:38:00,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +17: [2023-05-25 13:38:00,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +11: [2023-05-25 13:38:00,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +22: [2023-05-25 13:38:00,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +22: [2023-05-25 13:38:00,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +25: [2023-05-25 13:38:00,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +28: [2023-05-25 13:38:00,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt. +20: [2023-05-25 13:38:00,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +28: [2023-05-25 13:38:00,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt. +10: [2023-05-25 13:38:00,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt. +19: [2023-05-25 13:38:00,383] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +19: [2023-05-25 13:38:00,383] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +10: [2023-05-25 13:38:00,383] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt. +29: [2023-05-25 13:38:00,383] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +20: [2023-05-25 13:38:00,383] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +30: [2023-05-25 13:38:00,383] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +17: [2023-05-25 13:38:00,384] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +18: [2023-05-25 13:38:00,384] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +11: [2023-05-25 13:38:00,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +17: [2023-05-25 13:38:00,384] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +17: [2023-05-25 13:38:00,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +17: [2023-05-25 13:38:00,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +18: [2023-05-25 13:38:00,384] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +29: [2023-05-25 13:38:00,384] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt. +21: [2023-05-25 13:38:00,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt... +21: [2023-05-25 13:38:00,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt... +22: [2023-05-25 13:38:00,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:00,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +29: [2023-05-25 13:38:00,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +29: [2023-05-25 13:38:00,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +29: [2023-05-25 13:38:00,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt. +25: [2023-05-25 13:38:00,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +19: [2023-05-25 13:38:00,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt... +19: [2023-05-25 13:38:00,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt... +10: [2023-05-25 13:38:00,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +25: [2023-05-25 13:38:00,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt. +18: [2023-05-25 13:38:00,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +17: [2023-05-25 13:38:00,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt... +17: [2023-05-25 13:38:00,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt... +25: [2023-05-25 13:38:00,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt. +18: [2023-05-25 13:38:00,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +14: [2023-05-25 13:38:00,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +14: [2023-05-25 13:38:00,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +29: [2023-05-25 13:38:00,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +31: [2023-05-25 13:38:00,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt. +31: [2023-05-25 13:38:00,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt. +27: [2023-05-25 13:38:00,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt. +27: [2023-05-25 13:38:00,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt. +10: [2023-05-25 13:38:00,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +13: [2023-05-25 13:38:00,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:00,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:00,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:00,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:00,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:00,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +26: [2023-05-25 13:38:00,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt. +26: [2023-05-25 13:38:00,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt. +26: [2023-05-25 13:38:00,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +14: [2023-05-25 13:38:00,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +14: [2023-05-25 13:38:00,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:00,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +18: [2023-05-25 13:38:00,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +30: [2023-05-25 13:38:00,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +13: [2023-05-25 13:38:00,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt... +13: [2023-05-25 13:38:00,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt... +26: [2023-05-25 13:38:00,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +21: [2023-05-25 13:38:00,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +24: [2023-05-25 13:38:00,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt. +13: [2023-05-25 13:38:00,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt... +16: [2023-05-25 13:38:00,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +18: [2023-05-25 13:38:00,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +24: [2023-05-25 13:38:00,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt. +13: [2023-05-25 13:38:00,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt. +13: [2023-05-25 13:38:00,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +14: [2023-05-25 13:38:00,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +23: [2023-05-25 13:38:00,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +13: [2023-05-25 13:38:00,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt. +28: [2023-05-25 13:38:00,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +13: [2023-05-25 13:38:00,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt... +16: [2023-05-25 13:38:00,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +11: [2023-05-25 13:38:00,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +11: [2023-05-25 13:38:00,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +11: [2023-05-25 13:38:00,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +11: [2023-05-25 13:38:00,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +18: [2023-05-25 13:38:00,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt... +14: [2023-05-25 13:38:00,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +20: [2023-05-25 13:38:00,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +28: [2023-05-25 13:38:00,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +15: [2023-05-25 13:38:00,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt. +10: [2023-05-25 13:38:00,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:00,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt. +22: [2023-05-25 13:38:00,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +22: [2023-05-25 13:38:00,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +18: [2023-05-25 13:38:00,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt... +11: [2023-05-25 13:38:00,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +10: [2023-05-25 13:38:00,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +14: [2023-05-25 13:38:00,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt. + 2: [2023-05-25 13:38:00,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt. +23: [2023-05-25 13:38:00,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt. +11: [2023-05-25 13:38:00,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +14: [2023-05-25 13:38:00,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_02-model_states.pt. +11: [2023-05-25 13:38:00,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt... +11: [2023-05-25 13:38:00,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt... +29: [2023-05-25 13:38:00,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +22: [2023-05-25 13:38:00,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt... +15: [2023-05-25 13:38:00,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +22: [2023-05-25 13:38:00,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt... +15: [2023-05-25 13:38:00,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +29: [2023-05-25 13:38:00,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +20: [2023-05-25 13:38:00,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +31: [2023-05-25 13:38:00,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +31: [2023-05-25 13:38:00,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +15: [2023-05-25 13:38:00,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:00,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +25: [2023-05-25 13:38:00,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt. +25: [2023-05-25 13:38:00,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +21: [2023-05-25 13:38:00,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_03-model_01-model_states.pt. + 7: [2023-05-25 13:38:00,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +27: [2023-05-25 13:38:00,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +26: [2023-05-25 13:38:00,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +30: [2023-05-25 13:38:00,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +27: [2023-05-25 13:38:00,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +19: [2023-05-25 13:38:00,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt... + 9: [2023-05-25 13:38:00,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt. +24: [2023-05-25 13:38:00,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt. +21: [2023-05-25 13:38:00,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +30: [2023-05-25 13:38:00,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt... +19: [2023-05-25 13:38:00,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +26: [2023-05-25 13:38:00,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +23: [2023-05-25 13:38:00,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +28: [2023-05-25 13:38:00,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +28: [2023-05-25 13:38:00,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +30: [2023-05-25 13:38:00,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +24: [2023-05-25 13:38:00,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +24: [2023-05-25 13:38:00,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt... +13: [2023-05-25 13:38:00,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +13: [2023-05-25 13:38:00,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +16: [2023-05-25 13:38:00,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +23: [2023-05-25 13:38:00,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +16: [2023-05-25 13:38:00,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +14: [2023-05-25 13:38:00,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:00,411] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt. +24: [2023-05-25 13:38:00,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt... +30: [2023-05-25 13:38:00,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt... +15: [2023-05-25 13:38:00,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:00,411] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt. +28: [2023-05-25 13:38:00,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt... +28: [2023-05-25 13:38:00,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt... +15: [2023-05-25 13:38:00,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +17: [2023-05-25 13:38:00,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +24: [2023-05-25 13:38:00,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +15: [2023-05-25 13:38:00,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt. +22: [2023-05-25 13:38:00,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +14: [2023-05-25 13:38:00,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +29: [2023-05-25 13:38:00,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +15: [2023-05-25 13:38:00,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt. +24: [2023-05-25 13:38:00,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt... +30: [2023-05-25 13:38:00,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt. +11: [2023-05-25 13:38:00,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +30: [2023-05-25 13:38:00,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt. +12: [2023-05-25 13:38:00,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt. +12: [2023-05-25 13:38:00,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt. + 8: [2023-05-25 13:38:00,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +29: [2023-05-25 13:38:00,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt... + 8: [2023-05-25 13:38:00,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +18: [2023-05-25 13:38:00,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +11: [2023-05-25 13:38:00,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +11: [2023-05-25 13:38:00,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt. +11: [2023-05-25 13:38:00,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt. +29: [2023-05-25 13:38:00,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +11: [2023-05-25 13:38:00,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt... + 8: [2023-05-25 13:38:00,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt... +21: [2023-05-25 13:38:00,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +14: [2023-05-25 13:38:00,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt. +18: [2023-05-25 13:38:00,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +14: [2023-05-25 13:38:00,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_03-model_states.pt. +11: [2023-05-25 13:38:00,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt... +17: [2023-05-25 13:38:00,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +22: [2023-05-25 13:38:00,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt... + 8: [2023-05-25 13:38:00,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt... + 8: [2023-05-25 13:38:00,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt... +29: [2023-05-25 13:38:00,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt... +19: [2023-05-25 13:38:00,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +19: [2023-05-25 13:38:00,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:00,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +10: [2023-05-25 13:38:00,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:00,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:00,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:00,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +28: [2023-05-25 13:38:00,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +28: [2023-05-25 13:38:00,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt... +25: [2023-05-25 13:38:00,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt. +25: [2023-05-25 13:38:00,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt. + 7: [2023-05-25 13:38:00,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +10: [2023-05-25 13:38:00,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +17: [2023-05-25 13:38:00,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +10: [2023-05-25 13:38:00,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt... +10: [2023-05-25 13:38:00,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt... + 9: [2023-05-25 13:38:00,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt... +15: [2023-05-25 13:38:00,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +22: [2023-05-25 13:38:00,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +10: [2023-05-25 13:38:00,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:00,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +26: [2023-05-25 13:38:00,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +12: [2023-05-25 13:38:00,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt. +12: [2023-05-25 13:38:00,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_15-model_01-model_states.pt. +30: [2023-05-25 13:38:00,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +28: [2023-05-25 13:38:00,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt... +28: [2023-05-25 13:38:00,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt... +18: [2023-05-25 13:38:00,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +30: [2023-05-25 13:38:00,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +31: [2023-05-25 13:38:00,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +12: [2023-05-25 13:38:00,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:00,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:00,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +26: [2023-05-25 13:38:00,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt... +29: [2023-05-25 13:38:00,429] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +29: [2023-05-25 13:38:00,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +26: [2023-05-25 13:38:00,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +31: [2023-05-25 13:38:00,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt... +10: [2023-05-25 13:38:00,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +15: [2023-05-25 13:38:00,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +16: [2023-05-25 13:38:00,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt. +16: [2023-05-25 13:38:00,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt. +11: [2023-05-25 13:38:00,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:00,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +31: [2023-05-25 13:38:00,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,432] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. +18: [2023-05-25 13:38:00,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +14: [2023-05-25 13:38:00,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +26: [2023-05-25 13:38:00,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt... +22: [2023-05-25 13:38:00,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +12: [2023-05-25 13:38:00,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +14: [2023-05-25 13:38:00,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +27: [2023-05-25 13:38:00,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt. +27: [2023-05-25 13:38:00,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_03-model_states.pt. +17: [2023-05-25 13:38:00,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt... +31: [2023-05-25 13:38:00,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt... + 4: [2023-05-25 13:38:00,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt... + 4: [2023-05-25 13:38:00,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt... + 4: [2023-05-25 13:38:00,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt... +27: [2023-05-25 13:38:00,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt... +14: [2023-05-25 13:38:00,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +14: [2023-05-25 13:38:00,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +10: [2023-05-25 13:38:00,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:00,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:00,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt... +11: [2023-05-25 13:38:00,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +25: [2023-05-25 13:38:00,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +29: [2023-05-25 13:38:00,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt... +29: [2023-05-25 13:38:00,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt... +27: [2023-05-25 13:38:00,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +10: [2023-05-25 13:38:00,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt... +31: [2023-05-25 13:38:00,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt. +10: [2023-05-25 13:38:00,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt... +10: [2023-05-25 13:38:00,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt... +31: [2023-05-25 13:38:00,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_37-model_01-model_states.pt. +10: [2023-05-25 13:38:00,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt... +27: [2023-05-25 13:38:00,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt... +27: [2023-05-25 13:38:00,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt... +14: [2023-05-25 13:38:00,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt... +13: [2023-05-25 13:38:00,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +26: [2023-05-25 13:38:00,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +14: [2023-05-25 13:38:00,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:00,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:00,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +25: [2023-05-25 13:38:00,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt... +25: [2023-05-25 13:38:00,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +14: [2023-05-25 13:38:00,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +25: [2023-05-25 13:38:00,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +26: [2023-05-25 13:38:00,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt... +24: [2023-05-25 13:38:00,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +25: [2023-05-25 13:38:00,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt... +14: [2023-05-25 13:38:00,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt... +25: [2023-05-25 13:38:00,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. +24: [2023-05-25 13:38:00,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt... + 0: [2023-05-25 13:38:00,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. +16: [2023-05-25 13:38:00,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt... + 0: [2023-05-25 13:38:00,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt... + 1: [2023-05-25 13:38:00,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt... + 1: [2023-05-25 13:38:00,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt... + 1: [2023-05-25 13:38:00,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt... +16: [2023-05-25 13:38:00,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +12: [2023-05-25 13:38:00,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt... + 8: [2023-05-25 13:38:00,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt... +24: [2023-05-25 13:38:00,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +13: [2023-05-25 13:38:00,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:00,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +26: [2023-05-25 13:38:00,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +15: [2023-05-25 13:38:00,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt... + 3: [2023-05-25 13:38:00,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +24: [2023-05-25 13:38:00,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt... +19: [2023-05-25 13:38:00,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt. +19: [2023-05-25 13:38:00,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt. +26: [2023-05-25 13:38:00,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt... + 8: [2023-05-25 13:38:00,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +14: [2023-05-25 13:38:00,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt... +27: [2023-05-25 13:38:00,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +27: [2023-05-25 13:38:00,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... +12: [2023-05-25 13:38:00,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt... + 0: [2023-05-25 13:38:00,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt... + 0: [2023-05-25 13:38:00,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt... + 0: [2023-05-25 13:38:00,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt... +11: [2023-05-25 13:38:00,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:00,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:00,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt... +12: [2023-05-25 13:38:00,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:00,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:00,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +31: [2023-05-25 13:38:00,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt... + 3: [2023-05-25 13:38:00,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt... + 3: [2023-05-25 13:38:00,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt... + 3: [2023-05-25 13:38:00,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt... + 8: [2023-05-25 13:38:00,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt... + 3: [2023-05-25 13:38:00,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt... + 3: [2023-05-25 13:38:00,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt... +31: [2023-05-25 13:38:00,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt... +12: [2023-05-25 13:38:00,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +15: [2023-05-25 13:38:00,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +14: [2023-05-25 13:38:00,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:00,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt... +12: [2023-05-25 13:38:00,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt... +12: [2023-05-25 13:38:00,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt... +12: [2023-05-25 13:38:00,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt... +13: [2023-05-25 13:38:00,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:00,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt... +15: [2023-05-25 13:38:00,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt... +10: [2023-05-25 13:38:00,454] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:00,454] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. +14: [2023-05-25 13:38:00,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:00,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +15: [2023-05-25 13:38:00,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt... + 2: [2023-05-25 13:38:00,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +30: [2023-05-25 13:38:00,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt... + 5: [2023-05-25 13:38:00,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt... +23: [2023-05-25 13:38:00,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt. +14: [2023-05-25 13:38:00,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt... + 2: [2023-05-25 13:38:00,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt... + 5: [2023-05-25 13:38:00,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt... + 5: [2023-05-25 13:38:00,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt... + 5: [2023-05-25 13:38:00,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt... + 5: [2023-05-25 13:38:00,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt... + 9: [2023-05-25 13:38:00,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt... + 2: [2023-05-25 13:38:00,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt... +15: [2023-05-25 13:38:00,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt... + 2: [2023-05-25 13:38:00,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt... + 2: [2023-05-25 13:38:00,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt... + 2: [2023-05-25 13:38:00,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt... +19: [2023-05-25 13:38:00,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +15: [2023-05-25 13:38:00,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +30: [2023-05-25 13:38:00,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt... +23: [2023-05-25 13:38:00,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt. +19: [2023-05-25 13:38:00,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. +20: [2023-05-25 13:38:00,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt. +30: [2023-05-25 13:38:00,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. +15: [2023-05-25 13:38:00,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. +20: [2023-05-25 13:38:00,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt. + 8: [2023-05-25 13:38:00,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +19: [2023-05-25 13:38:00,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt. + 8: [2023-05-25 13:38:00,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +19: [2023-05-25 13:38:00,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt. +17: [2023-05-25 13:38:00,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt. +14: [2023-05-25 13:38:00,465] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +30: [2023-05-25 13:38:00,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt... +12: [2023-05-25 13:38:00,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +12: [2023-05-25 13:38:00,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt... + 6: [2023-05-25 13:38:00,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt... + 6: [2023-05-25 13:38:00,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt... + 6: [2023-05-25 13:38:00,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt... + 6: [2023-05-25 13:38:00,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt... + 6: [2023-05-25 13:38:00,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt... +14: [2023-05-25 13:38:00,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt... +25: [2023-05-25 13:38:00,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +11: [2023-05-25 13:38:00,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:00,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +10: [2023-05-25 13:38:00,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +15: [2023-05-25 13:38:00,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:00,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt... +12: [2023-05-25 13:38:00,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt... +14: [2023-05-25 13:38:00,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +25: [2023-05-25 13:38:00,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +11: [2023-05-25 13:38:00,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +11: [2023-05-25 13:38:00,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt... +17: [2023-05-25 13:38:00,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt. +25: [2023-05-25 13:38:00,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt... +14: [2023-05-25 13:38:00,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt... +25: [2023-05-25 13:38:00,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt... +11: [2023-05-25 13:38:00,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt... +15: [2023-05-25 13:38:00,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt... +23: [2023-05-25 13:38:00,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +17: [2023-05-25 13:38:00,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt. +17: [2023-05-25 13:38:00,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt. +15: [2023-05-25 13:38:00,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +18: [2023-05-25 13:38:00,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt. + 1: [2023-05-25 13:38:00,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. +21: [2023-05-25 13:38:00,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt. +18: [2023-05-25 13:38:00,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt. + 0: [2023-05-25 13:38:00,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. +21: [2023-05-25 13:38:00,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt. +16: [2023-05-25 13:38:00,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:00,476] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,477] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. +15: [2023-05-25 13:38:00,477] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +20: [2023-05-25 13:38:00,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +18: [2023-05-25 13:38:00,477] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt. +18: [2023-05-25 13:38:00,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt. +23: [2023-05-25 13:38:00,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt. +16: [2023-05-25 13:38:00,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +17: [2023-05-25 13:38:00,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:00,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt. +16: [2023-05-25 13:38:00,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt... +16: [2023-05-25 13:38:00,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt. + 4: [2023-05-25 13:38:00,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +16: [2023-05-25 13:38:00,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt. +19: [2023-05-25 13:38:00,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +23: [2023-05-25 13:38:00,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt. +15: [2023-05-25 13:38:00,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +22: [2023-05-25 13:38:00,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_03-model_states.pt. + 3: [2023-05-25 13:38:00,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. +20: [2023-05-25 13:38:00,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt. +15: [2023-05-25 13:38:00,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt... +31: [2023-05-25 13:38:00,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +19: [2023-05-25 13:38:00,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +16: [2023-05-25 13:38:00,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt... +20: [2023-05-25 13:38:00,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +20: [2023-05-25 13:38:00,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt. +12: [2023-05-25 13:38:00,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +15: [2023-05-25 13:38:00,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt... +12: [2023-05-25 13:38:00,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +31: [2023-05-25 13:38:00,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt... +27: [2023-05-25 13:38:00,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +27: [2023-05-25 13:38:00,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +12: [2023-05-25 13:38:00,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +15: [2023-05-25 13:38:00,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:00,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt... +27: [2023-05-25 13:38:00,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt... +27: [2023-05-25 13:38:00,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt... +15: [2023-05-25 13:38:00,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt... +12: [2023-05-25 13:38:00,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt... +21: [2023-05-25 13:38:00,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt. + 5: [2023-05-25 13:38:00,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. +22: [2023-05-25 13:38:00,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt. +22: [2023-05-25 13:38:00,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt. +31: [2023-05-25 13:38:00,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_00-model_states.pt. +21: [2023-05-25 13:38:00,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_26-model_01-model_states.pt. +17: [2023-05-25 13:38:00,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +17: [2023-05-25 13:38:00,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +17: [2023-05-25 13:38:00,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. +12: [2023-05-25 13:38:00,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +31: [2023-05-25 13:38:00,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt... + 1: [2023-05-25 13:38:00,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +18: [2023-05-25 13:38:00,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +21: [2023-05-25 13:38:00,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +21: [2023-05-25 13:38:00,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. +18: [2023-05-25 13:38:00,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. +18: [2023-05-25 13:38:00,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +16: [2023-05-25 13:38:00,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +18: [2023-05-25 13:38:00,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +23: [2023-05-25 13:38:00,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. +23: [2023-05-25 13:38:00,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +16: [2023-05-25 13:38:00,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +20: [2023-05-25 13:38:00,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... +19: [2023-05-25 13:38:00,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt... + 7: [2023-05-25 13:38:00,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt... +12: [2023-05-25 13:38:00,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt... + 7: [2023-05-25 13:38:00,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt... + 7: [2023-05-25 13:38:00,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt... +22: [2023-05-25 13:38:00,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt... +22: [2023-05-25 13:38:00,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +20: [2023-05-25 13:38:00,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +19: [2023-05-25 13:38:00,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt... + 6: [2023-05-25 13:38:00,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +21: [2023-05-25 13:38:00,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +21: [2023-05-25 13:38:00,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +19: [2023-05-25 13:38:00,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +12: [2023-05-25 13:38:00,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +22: [2023-05-25 13:38:00,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:00,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +19: [2023-05-25 13:38:00,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt... + 6: [2023-05-25 13:38:00,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +23: [2023-05-25 13:38:00,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:00,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +19: [2023-05-25 13:38:00,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:00,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt... +23: [2023-05-25 13:38:00,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt... +19: [2023-05-25 13:38:00,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt... + 6: [2023-05-25 13:38:00,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +19: [2023-05-25 13:38:00,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +17: [2023-05-25 13:38:00,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +19: [2023-05-25 13:38:00,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt... +17: [2023-05-25 13:38:00,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +20: [2023-05-25 13:38:00,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +17: [2023-05-25 13:38:00,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt... +20: [2023-05-25 13:38:00,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +17: [2023-05-25 13:38:00,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt... +20: [2023-05-25 13:38:00,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt... +20: [2023-05-25 13:38:00,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt... +17: [2023-05-25 13:38:00,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +17: [2023-05-25 13:38:00,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:00,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:00,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. +22: [2023-05-25 13:38:00,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:00,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:00,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +17: [2023-05-25 13:38:00,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt... +17: [2023-05-25 13:38:00,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt... +20: [2023-05-25 13:38:00,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_00-model_states.pt. +16: [2023-05-25 13:38:00,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt... +16: [2023-05-25 13:38:00,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt... +21: [2023-05-25 13:38:00,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:00,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt... +23: [2023-05-25 13:38:00,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt... +22: [2023-05-25 13:38:00,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt... +20: [2023-05-25 13:38:00,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +18: [2023-05-25 13:38:00,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +21: [2023-05-25 13:38:00,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt... +18: [2023-05-25 13:38:00,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +18: [2023-05-25 13:38:00,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt... +21: [2023-05-25 13:38:00,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +22: [2023-05-25 13:38:00,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +22: [2023-05-25 13:38:00,535] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +22: [2023-05-25 13:38:00,535] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +18: [2023-05-25 13:38:00,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +21: [2023-05-25 13:38:00,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt... +20: [2023-05-25 13:38:00,536] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt... +20: [2023-05-25 13:38:00,536] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt... +18: [2023-05-25 13:38:00,536] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt... +18: [2023-05-25 13:38:00,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt... +22: [2023-05-25 13:38:00,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt... +22: [2023-05-25 13:38:00,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt... +22: [2023-05-25 13:38:00,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt... +21: [2023-05-25 13:38:00,539] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +18: [2023-05-25 13:38:00,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +21: [2023-05-25 13:38:00,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_00-model_states.pt. +21: [2023-05-25 13:38:00,542] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt... +21: [2023-05-25 13:38:00,543] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt... +18: [2023-05-25 13:38:00,543] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt... + 7: [2023-05-25 13:38:00,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +18: [2023-05-25 13:38:00,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt. +18: [2023-05-25 13:38:00,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt. +18: [2023-05-25 13:38:00,596] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +20: [2023-05-25 13:38:00,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt. +20: [2023-05-25 13:38:00,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt. +18: [2023-05-25 13:38:00,599] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +16: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt. +16: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt. +23: [2023-05-25 13:38:00,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt. +23: [2023-05-25 13:38:00,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt. +17: [2023-05-25 13:38:00,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt. +17: [2023-05-25 13:38:00,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt. +20: [2023-05-25 13:38:00,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +19: [2023-05-25 13:38:00,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt. +20: [2023-05-25 13:38:00,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +19: [2023-05-25 13:38:00,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt. +21: [2023-05-25 13:38:00,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt. +21: [2023-05-25 13:38:00,612] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt. +16: [2023-05-25 13:38:00,617] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +16: [2023-05-25 13:38:00,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +22: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt. +24: [2023-05-25 13:38:00,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt. +24: [2023-05-25 13:38:00,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt. +17: [2023-05-25 13:38:00,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +23: [2023-05-25 13:38:00,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +17: [2023-05-25 13:38:00,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +23: [2023-05-25 13:38:00,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +19: [2023-05-25 13:38:00,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +19: [2023-05-25 13:38:00,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +21: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +24: [2023-05-25 13:38:00,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt. +24: [2023-05-25 13:38:00,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt. +21: [2023-05-25 13:38:00,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +22: [2023-05-25 13:38:00,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +22: [2023-05-25 13:38:00,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_02-model_states.pt. +28: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt. +28: [2023-05-25 13:38:00,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt. +24: [2023-05-25 13:38:00,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +24: [2023-05-25 13:38:00,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +24: [2023-05-25 13:38:00,639] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +24: [2023-05-25 13:38:00,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +13: [2023-05-25 13:38:00,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt. +13: [2023-05-25 13:38:00,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt. +22: [2023-05-25 13:38:00,646] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +28: [2023-05-25 13:38:00,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +28: [2023-05-25 13:38:00,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +12: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt. +10: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt. +10: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt. + 8: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt. + 8: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt. +13: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +12: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt. +26: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt. +26: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt. +31: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt. +31: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt. +13: [2023-05-25 13:38:00,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt. +14: [2023-05-25 13:38:00,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt. +13: [2023-05-25 13:38:00,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt. +14: [2023-05-25 13:38:00,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt. + 9: [2023-05-25 13:38:00,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt. + 9: [2023-05-25 13:38:00,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt. +29: [2023-05-25 13:38:00,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt. +29: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt. +26: [2023-05-25 13:38:00,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt. +26: [2023-05-25 13:38:00,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt. +30: [2023-05-25 13:38:00,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt. +30: [2023-05-25 13:38:00,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt. +12: [2023-05-25 13:38:00,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +15: [2023-05-25 13:38:00,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt. +10: [2023-05-25 13:38:00,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +28: [2023-05-25 13:38:00,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt. +10: [2023-05-25 13:38:00,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +15: [2023-05-25 13:38:00,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt. +28: [2023-05-25 13:38:00,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt. +11: [2023-05-25 13:38:00,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt. +11: [2023-05-25 13:38:00,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_02-model_states.pt. + 8: [2023-05-25 13:38:00,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +31: [2023-05-25 13:38:00,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt. +31: [2023-05-25 13:38:00,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt. + 0: [2023-05-25 13:38:00,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt. + 0: [2023-05-25 13:38:00,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt. +31: [2023-05-25 13:38:00,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +27: [2023-05-25 13:38:00,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt. +26: [2023-05-25 13:38:00,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +27: [2023-05-25 13:38:00,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt. +31: [2023-05-25 13:38:00,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +14: [2023-05-25 13:38:00,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +12: [2023-05-25 13:38:00,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:00,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:00,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +26: [2023-05-25 13:38:00,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +13: [2023-05-25 13:38:00,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +25: [2023-05-25 13:38:00,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt. +25: [2023-05-25 13:38:00,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_02-model_states.pt. +30: [2023-05-25 13:38:00,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt. +30: [2023-05-25 13:38:00,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +26: [2023-05-25 13:38:00,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:00,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt. +30: [2023-05-25 13:38:00,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +26: [2023-05-25 13:38:00,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:00,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt. +30: [2023-05-25 13:38:00,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt. +28: [2023-05-25 13:38:00,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +29: [2023-05-25 13:38:00,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +15: [2023-05-25 13:38:00,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:00,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:00,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +28: [2023-05-25 13:38:00,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +24: [2023-05-25 13:38:00,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt. + 0: [2023-05-25 13:38:00,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +24: [2023-05-25 13:38:00,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt. + 1: [2023-05-25 13:38:00,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt. +15: [2023-05-25 13:38:00,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt. +31: [2023-05-25 13:38:00,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +27: [2023-05-25 13:38:00,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +31: [2023-05-25 13:38:00,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +11: [2023-05-25 13:38:00,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt. + 8: [2023-05-25 13:38:00,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt. +11: [2023-05-25 13:38:00,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt. + 8: [2023-05-25 13:38:00,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt. +27: [2023-05-25 13:38:00,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +25: [2023-05-25 13:38:00,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +28: [2023-05-25 13:38:00,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +28: [2023-05-25 13:38:00,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:00,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:00,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +30: [2023-05-25 13:38:00,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +28: [2023-05-25 13:38:00,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +28: [2023-05-25 13:38:00,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +28: [2023-05-25 13:38:00,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +29: [2023-05-25 13:38:00,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +25: [2023-05-25 13:38:00,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +25: [2023-05-25 13:38:00,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +25: [2023-05-25 13:38:00,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:00,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:00,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +27: [2023-05-25 13:38:00,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:00,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +27: [2023-05-25 13:38:00,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +30: [2023-05-25 13:38:00,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +28: [2023-05-25 13:38:00,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +28: [2023-05-25 13:38:00,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt... +28: [2023-05-25 13:38:00,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt... +26: [2023-05-25 13:38:00,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:00,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +25: [2023-05-25 13:38:00,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:00,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +25: [2023-05-25 13:38:00,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +31: [2023-05-25 13:38:00,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +31: [2023-05-25 13:38:00,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:00,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +27: [2023-05-25 13:38:00,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +27: [2023-05-25 13:38:00,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +26: [2023-05-25 13:38:00,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt. + 1: [2023-05-25 13:38:00,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +29: [2023-05-25 13:38:00,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +26: [2023-05-25 13:38:00,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt. +31: [2023-05-25 13:38:00,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +31: [2023-05-25 13:38:00,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:00,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:00,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +24: [2023-05-25 13:38:00,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:00,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:00,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +28: [2023-05-25 13:38:00,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt. +28: [2023-05-25 13:38:00,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt. +24: [2023-05-25 13:38:00,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:00,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:00,698] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +18: [2023-05-25 13:38:00,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:00,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:00,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:00,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:00,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:00,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:00,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:00,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +23: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +23: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt... +23: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt... +29: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt. +18: [2023-05-25 13:38:00,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +18: [2023-05-25 13:38:00,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +29: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt. + 1: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt. +18: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt... +18: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt... + 1: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt. +29: [2023-05-25 13:38:00,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt. +29: [2023-05-25 13:38:00,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt. +31: [2023-05-25 13:38:00,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt. +31: [2023-05-25 13:38:00,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt. +25: [2023-05-25 13:38:00,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt. +25: [2023-05-25 13:38:00,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt. +15: [2023-05-25 13:38:00,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt. +15: [2023-05-25 13:38:00,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt. +27: [2023-05-25 13:38:00,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt. +27: [2023-05-25 13:38:00,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt. +22: [2023-05-25 13:38:00,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +22: [2023-05-25 13:38:00,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +22: [2023-05-25 13:38:00,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +22: [2023-05-25 13:38:00,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +27: [2023-05-25 13:38:00,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt. +25: [2023-05-25 13:38:00,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt. +27: [2023-05-25 13:38:00,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_03-model_states.pt. +25: [2023-05-25 13:38:00,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_38-model_01-model_states.pt. +26: [2023-05-25 13:38:00,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +22: [2023-05-25 13:38:00,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +22: [2023-05-25 13:38:00,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +22: [2023-05-25 13:38:00,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt... +22: [2023-05-25 13:38:00,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt... +26: [2023-05-25 13:38:00,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +31: [2023-05-25 13:38:00,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +31: [2023-05-25 13:38:00,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +28: [2023-05-25 13:38:00,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +26: [2023-05-25 13:38:00,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +31: [2023-05-25 13:38:00,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +28: [2023-05-25 13:38:00,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +31: [2023-05-25 13:38:00,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt... +31: [2023-05-25 13:38:00,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt... +26: [2023-05-25 13:38:00,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:00,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt... +31: [2023-05-25 13:38:00,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt... +24: [2023-05-25 13:38:00,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +24: [2023-05-25 13:38:00,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +24: [2023-05-25 13:38:00,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:00,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:00,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +24: [2023-05-25 13:38:00,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +24: [2023-05-25 13:38:00,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +24: [2023-05-25 13:38:00,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +28: [2023-05-25 13:38:00,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +28: [2023-05-25 13:38:00,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:00,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt... + 1: [2023-05-25 13:38:00,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +21: [2023-05-25 13:38:00,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt. + 1: [2023-05-25 13:38:00,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +21: [2023-05-25 13:38:00,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +21: [2023-05-25 13:38:00,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt. +24: [2023-05-25 13:38:00,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt... +24: [2023-05-25 13:38:00,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt... +28: [2023-05-25 13:38:00,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +19: [2023-05-25 13:38:00,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +19: [2023-05-25 13:38:00,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +26: [2023-05-25 13:38:00,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt... +26: [2023-05-25 13:38:00,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt... +19: [2023-05-25 13:38:00,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +19: [2023-05-25 13:38:00,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +24: [2023-05-25 13:38:00,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt... +24: [2023-05-25 13:38:00,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +24: [2023-05-25 13:38:00,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt... +21: [2023-05-25 13:38:00,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +24: [2023-05-25 13:38:00,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +21: [2023-05-25 13:38:00,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +28: [2023-05-25 13:38:00,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt... +28: [2023-05-25 13:38:00,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt... +21: [2023-05-25 13:38:00,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +16: [2023-05-25 13:38:00,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt. +16: [2023-05-25 13:38:00,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt. +16: [2023-05-25 13:38:00,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +16: [2023-05-25 13:38:00,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +19: [2023-05-25 13:38:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt... +16: [2023-05-25 13:38:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +15: [2023-05-25 13:38:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +21: [2023-05-25 13:38:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt... +21: [2023-05-25 13:38:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt... +15: [2023-05-25 13:38:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +16: [2023-05-25 13:38:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +19: [2023-05-25 13:38:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +30: [2023-05-25 13:38:00,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +19: [2023-05-25 13:38:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +19: [2023-05-25 13:38:00,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt... +26: [2023-05-25 13:38:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +31: [2023-05-25 13:38:00,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +17: [2023-05-25 13:38:00,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +17: [2023-05-25 13:38:00,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +17: [2023-05-25 13:38:00,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +31: [2023-05-25 13:38:00,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +17: [2023-05-25 13:38:00,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +16: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt... +16: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt... +16: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +30: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +30: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt... +16: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +25: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +31: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +25: [2023-05-25 13:38:00,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +17: [2023-05-25 13:38:00,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +17: [2023-05-25 13:38:00,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +30: [2023-05-25 13:38:00,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt... +29: [2023-05-25 13:38:00,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +17: [2023-05-25 13:38:00,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt... +17: [2023-05-25 13:38:00,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt... +25: [2023-05-25 13:38:00,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +25: [2023-05-25 13:38:00,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +31: [2023-05-25 13:38:00,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt... +27: [2023-05-25 13:38:00,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +28: [2023-05-25 13:38:00,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:00,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:00,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:00,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:00,727] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +29: [2023-05-25 13:38:00,727] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +31: [2023-05-25 13:38:00,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +27: [2023-05-25 13:38:00,727] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +27: [2023-05-25 13:38:00,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +25: [2023-05-25 13:38:00,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +25: [2023-05-25 13:38:00,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt. +29: [2023-05-25 13:38:00,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +27: [2023-05-25 13:38:00,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt... +24: [2023-05-25 13:38:00,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +25: [2023-05-25 13:38:00,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +28: [2023-05-25 13:38:00,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +27: [2023-05-25 13:38:00,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt. +25: [2023-05-25 13:38:00,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +27: [2023-05-25 13:38:00,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +27: [2023-05-25 13:38:00,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +23: [2023-05-25 13:38:00,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +25: [2023-05-25 13:38:00,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt... +25: [2023-05-25 13:38:00,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt... +18: [2023-05-25 13:38:00,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:00,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +24: [2023-05-25 13:38:00,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt... +22: [2023-05-25 13:38:00,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt. +18: [2023-05-25 13:38:00,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +30: [2023-05-25 13:38:00,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +16: [2023-05-25 13:38:00,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +22: [2023-05-25 13:38:00,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt. +18: [2023-05-25 13:38:00,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt. +29: [2023-05-25 13:38:00,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt... +18: [2023-05-25 13:38:00,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt. +20: [2023-05-25 13:38:00,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +30: [2023-05-25 13:38:00,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +20: [2023-05-25 13:38:00,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:00,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt. +20: [2023-05-25 13:38:00,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +16: [2023-05-25 13:38:00,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +29: [2023-05-25 13:38:00,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +23: [2023-05-25 13:38:00,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt. +27: [2023-05-25 13:38:00,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt... +23: [2023-05-25 13:38:00,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt. +27: [2023-05-25 13:38:00,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt... + 6: [2023-05-25 13:38:00,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt. + 6: [2023-05-25 13:38:00,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt. +26: [2023-05-25 13:38:00,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +20: [2023-05-25 13:38:00,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt... + 0: [2023-05-25 13:38:00,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt. +22: [2023-05-25 13:38:00,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt. +24: [2023-05-25 13:38:00,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +31: [2023-05-25 13:38:00,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt. + 5: [2023-05-25 13:38:00,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt. + 5: [2023-05-25 13:38:00,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt. +30: [2023-05-25 13:38:00,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +11: [2023-05-25 13:38:00,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:00,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:00,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:00,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +20: [2023-05-25 13:38:00,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +11: [2023-05-25 13:38:00,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +20: [2023-05-25 13:38:00,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt... +11: [2023-05-25 13:38:00,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +27: [2023-05-25 13:38:00,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +30: [2023-05-25 13:38:00,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt... +20: [2023-05-25 13:38:00,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +24: [2023-05-25 13:38:00,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt... +22: [2023-05-25 13:38:00,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +21: [2023-05-25 13:38:00,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +30: [2023-05-25 13:38:00,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt... +29: [2023-05-25 13:38:00,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +11: [2023-05-25 13:38:00,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:00,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt... +11: [2023-05-25 13:38:00,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt... +31: [2023-05-25 13:38:00,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +11: [2023-05-25 13:38:00,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt... +14: [2023-05-25 13:38:00,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt. +14: [2023-05-25 13:38:00,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt. +11: [2023-05-25 13:38:00,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt... +26: [2023-05-25 13:38:00,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +11: [2023-05-25 13:38:00,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +26: [2023-05-25 13:38:00,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +26: [2023-05-25 13:38:00,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +21: [2023-05-25 13:38:00,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt. +28: [2023-05-25 13:38:00,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +28: [2023-05-25 13:38:00,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:00,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt... +10: [2023-05-25 13:38:00,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt. +28: [2023-05-25 13:38:00,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:00,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt... +25: [2023-05-25 13:38:00,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +10: [2023-05-25 13:38:00,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt. +21: [2023-05-25 13:38:00,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt. + 4: [2023-05-25 13:38:00,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +23: [2023-05-25 13:38:00,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +18: [2023-05-25 13:38:00,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +28: [2023-05-25 13:38:00,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt... +28: [2023-05-25 13:38:00,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt... +30: [2023-05-25 13:38:00,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +23: [2023-05-25 13:38:00,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +30: [2023-05-25 13:38:00,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +30: [2023-05-25 13:38:00,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +26: [2023-05-25 13:38:00,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt... + 0: [2023-05-25 13:38:00,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +25: [2023-05-25 13:38:00,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +18: [2023-05-25 13:38:00,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +17: [2023-05-25 13:38:00,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt. + 1: [2023-05-25 13:38:00,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt. +30: [2023-05-25 13:38:00,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt... +22: [2023-05-25 13:38:00,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +22: [2023-05-25 13:38:00,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +19: [2023-05-25 13:38:00,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +24: [2023-05-25 13:38:00,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +23: [2023-05-25 13:38:00,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +23: [2023-05-25 13:38:00,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +24: [2023-05-25 13:38:00,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +21: [2023-05-25 13:38:00,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:00,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +18: [2023-05-25 13:38:00,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +29: [2023-05-25 13:38:00,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +17: [2023-05-25 13:38:00,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt. +31: [2023-05-25 13:38:00,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +17: [2023-05-25 13:38:00,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt. + 5: [2023-05-25 13:38:00,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +27: [2023-05-25 13:38:00,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +22: [2023-05-25 13:38:00,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +19: [2023-05-25 13:38:00,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +22: [2023-05-25 13:38:00,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +30: [2023-05-25 13:38:00,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +12: [2023-05-25 13:38:00,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +12: [2023-05-25 13:38:00,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +12: [2023-05-25 13:38:00,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:00,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt. +12: [2023-05-25 13:38:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt... +12: [2023-05-25 13:38:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt. +14: [2023-05-25 13:38:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt. + 3: [2023-05-25 13:38:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt. +12: [2023-05-25 13:38:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +30: [2023-05-25 13:38:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt... +12: [2023-05-25 13:38:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt. + 2: [2023-05-25 13:38:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt. +21: [2023-05-25 13:38:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +17: [2023-05-25 13:38:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +31: [2023-05-25 13:38:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +10: [2023-05-25 13:38:00,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +16: [2023-05-25 13:38:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +10: [2023-05-25 13:38:00,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +31: [2023-05-25 13:38:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +25: [2023-05-25 13:38:00,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +25: [2023-05-25 13:38:00,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +12: [2023-05-25 13:38:00,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt... +16: [2023-05-25 13:38:00,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +21: [2023-05-25 13:38:00,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +25: [2023-05-25 13:38:00,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +15: [2023-05-25 13:38:00,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +15: [2023-05-25 13:38:00,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +31: [2023-05-25 13:38:00,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt... +15: [2023-05-25 13:38:00,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +15: [2023-05-25 13:38:00,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +15: [2023-05-25 13:38:00,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +15: [2023-05-25 13:38:00,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +27: [2023-05-25 13:38:00,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +21: [2023-05-25 13:38:00,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +31: [2023-05-25 13:38:00,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt... +30: [2023-05-25 13:38:00,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +25: [2023-05-25 13:38:00,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt... +25: [2023-05-25 13:38:00,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt... +25: [2023-05-25 13:38:00,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt... +29: [2023-05-25 13:38:00,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +10: [2023-05-25 13:38:00,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:00,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:00,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:00,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +15: [2023-05-25 13:38:00,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +15: [2023-05-25 13:38:00,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt... +15: [2023-05-25 13:38:00,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt... +25: [2023-05-25 13:38:00,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +15: [2023-05-25 13:38:00,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt... +15: [2023-05-25 13:38:00,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +29: [2023-05-25 13:38:00,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +10: [2023-05-25 13:38:00,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +15: [2023-05-25 13:38:00,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt... +10: [2023-05-25 13:38:00,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +24: [2023-05-25 13:38:00,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +19: [2023-05-25 13:38:00,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +10: [2023-05-25 13:38:00,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt... +10: [2023-05-25 13:38:00,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt... +24: [2023-05-25 13:38:00,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +25: [2023-05-25 13:38:00,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt... + 0: [2023-05-25 13:38:00,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt. + 0: [2023-05-25 13:38:00,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt. +29: [2023-05-25 13:38:00,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt... +21: [2023-05-25 13:38:00,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +17: [2023-05-25 13:38:00,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +17: [2023-05-25 13:38:00,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +16: [2023-05-25 13:38:00,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +19: [2023-05-25 13:38:00,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt. +19: [2023-05-25 13:38:00,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt. +11: [2023-05-25 13:38:00,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +17: [2023-05-25 13:38:00,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +27: [2023-05-25 13:38:00,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt. + 3: [2023-05-25 13:38:00,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt. + 3: [2023-05-25 13:38:00,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt. + 3: [2023-05-25 13:38:00,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt. + 9: [2023-05-25 13:38:00,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +29: [2023-05-25 13:38:00,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:00,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +19: [2023-05-25 13:38:00,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt. +11: [2023-05-25 13:38:00,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_03-model_states.pt. +29: [2023-05-25 13:38:00,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +16: [2023-05-25 13:38:00,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +16: [2023-05-25 13:38:00,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt. + 5: [2023-05-25 13:38:00,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt. + 9: [2023-05-25 13:38:00,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +16: [2023-05-25 13:38:00,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt... +27: [2023-05-25 13:38:00,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt... +17: [2023-05-25 13:38:00,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +27: [2023-05-25 13:38:00,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +27: [2023-05-25 13:38:00,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +27: [2023-05-25 13:38:00,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_00-model_states.pt. +16: [2023-05-25 13:38:00,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt... +16: [2023-05-25 13:38:00,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +20: [2023-05-25 13:38:00,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt. +20: [2023-05-25 13:38:00,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_03-model_states.pt. +29: [2023-05-25 13:38:00,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt... +29: [2023-05-25 13:38:00,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt... +20: [2023-05-25 13:38:00,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +29: [2023-05-25 13:38:00,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt... + 3: [2023-05-25 13:38:00,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt. +22: [2023-05-25 13:38:00,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:00,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +23: [2023-05-25 13:38:00,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +27: [2023-05-25 13:38:00,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt... +27: [2023-05-25 13:38:00,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt... +27: [2023-05-25 13:38:00,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt... + 2: [2023-05-25 13:38:00,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt. + 0: [2023-05-25 13:38:00,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +19: [2023-05-25 13:38:00,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +18: [2023-05-25 13:38:00,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:00,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +19: [2023-05-25 13:38:00,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +22: [2023-05-25 13:38:00,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt... +11: [2023-05-25 13:38:00,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +18: [2023-05-25 13:38:00,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:00,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt... +23: [2023-05-25 13:38:00,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt... +18: [2023-05-25 13:38:00,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt... +18: [2023-05-25 13:38:00,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt... + 3: [2023-05-25 13:38:00,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +12: [2023-05-25 13:38:00,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +22: [2023-05-25 13:38:00,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +10: [2023-05-25 13:38:00,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +11: [2023-05-25 13:38:00,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +21: [2023-05-25 13:38:00,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +22: [2023-05-25 13:38:00,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt... +10: [2023-05-25 13:38:00,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +17: [2023-05-25 13:38:00,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +10: [2023-05-25 13:38:00,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt... + 0: [2023-05-25 13:38:00,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt... + 7: [2023-05-25 13:38:00,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +10: [2023-05-25 13:38:00,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt... +21: [2023-05-25 13:38:00,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt... + 0: [2023-05-25 13:38:00,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt... + 2: [2023-05-25 13:38:00,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +17: [2023-05-25 13:38:00,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt... + 5: [2023-05-25 13:38:00,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +12: [2023-05-25 13:38:00,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt... +15: [2023-05-25 13:38:00,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +21: [2023-05-25 13:38:00,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +15: [2023-05-25 13:38:00,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:00,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +21: [2023-05-25 13:38:00,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt... +12: [2023-05-25 13:38:00,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt. +12: [2023-05-25 13:38:00,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_01-model_states.pt. + 2: [2023-05-25 13:38:00,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +20: [2023-05-25 13:38:00,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +20: [2023-05-25 13:38:00,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +12: [2023-05-25 13:38:00,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +17: [2023-05-25 13:38:00,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:00,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt. +17: [2023-05-25 13:38:00,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt... + 8: [2023-05-25 13:38:00,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt. +20: [2023-05-25 13:38:00,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt... + 8: [2023-05-25 13:38:00,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt... + 4: [2023-05-25 13:38:00,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt. + 4: [2023-05-25 13:38:00,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt. + 8: [2023-05-25 13:38:00,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt... + 8: [2023-05-25 13:38:00,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:00,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +20: [2023-05-25 13:38:00,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt... +13: [2023-05-25 13:38:00,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:00,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:00,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:00,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:00,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt... +13: [2023-05-25 13:38:00,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt. +13: [2023-05-25 13:38:00,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt. +13: [2023-05-25 13:38:00,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:00,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:00,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:00,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +12: [2023-05-25 13:38:00,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:00,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:00,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:00,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:00,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +13: [2023-05-25 13:38:00,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt... + 4: [2023-05-25 13:38:00,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +13: [2023-05-25 13:38:00,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt... +13: [2023-05-25 13:38:00,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt... +13: [2023-05-25 13:38:00,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:00,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt... + 9: [2023-05-25 13:38:00,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:00,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:00,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt... +14: [2023-05-25 13:38:00,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt... +15: [2023-05-25 13:38:00,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:00,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt... +14: [2023-05-25 13:38:00,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:00,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt... +19: [2023-05-25 13:38:00,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt. + 4: [2023-05-25 13:38:00,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt... + 4: [2023-05-25 13:38:00,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt... + 4: [2023-05-25 13:38:00,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt... +12: [2023-05-25 13:38:00,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt. +12: [2023-05-25 13:38:00,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt... + 4: [2023-05-25 13:38:00,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt. + 2: [2023-05-25 13:38:00,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt. + 6: [2023-05-25 13:38:00,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt. + 4: [2023-05-25 13:38:00,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt... + 9: [2023-05-25 13:38:00,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt... + 6: [2023-05-25 13:38:00,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt. +14: [2023-05-25 13:38:00,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt. +15: [2023-05-25 13:38:00,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:00,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt. + 6: [2023-05-25 13:38:00,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt. + 6: [2023-05-25 13:38:00,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt. +11: [2023-05-25 13:38:00,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt. +11: [2023-05-25 13:38:00,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt. + 9: [2023-05-25 13:38:00,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt... + 9: [2023-05-25 13:38:00,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +19: [2023-05-25 13:38:00,808] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt... +10: [2023-05-25 13:38:00,809] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +19: [2023-05-25 13:38:00,810] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt... +19: [2023-05-25 13:38:00,810] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt... +10: [2023-05-25 13:38:00,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt. +10: [2023-05-25 13:38:00,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt. +15: [2023-05-25 13:38:00,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt. +15: [2023-05-25 13:38:00,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt. + 0: [2023-05-25 13:38:00,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt. + 7: [2023-05-25 13:38:00,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_02-model_states.pt. +20: [2023-05-25 13:38:00,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +13: [2023-05-25 13:38:00,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:00,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +20: [2023-05-25 13:38:00,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt... + 0: [2023-05-25 13:38:00,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +11: [2023-05-25 13:38:00,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt... + 0: [2023-05-25 13:38:00,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt... +14: [2023-05-25 13:38:00,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +11: [2023-05-25 13:38:00,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +14: [2023-05-25 13:38:00,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +10: [2023-05-25 13:38:00,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +10: [2023-05-25 13:38:00,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,825] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +15: [2023-05-25 13:38:00,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt. + 0: [2023-05-25 13:38:00,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +15: [2023-05-25 13:38:00,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_04-model_01-model_states.pt. +20: [2023-05-25 13:38:00,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +20: [2023-05-25 13:38:00,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt... + 7: [2023-05-25 13:38:00,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +13: [2023-05-25 13:38:00,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +14: [2023-05-25 13:38:00,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt... + 6: [2023-05-25 13:38:00,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt... + 7: [2023-05-25 13:38:00,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt... + 7: [2023-05-25 13:38:00,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt... + 2: [2023-05-25 13:38:00,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt... + 2: [2023-05-25 13:38:00,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt... + 2: [2023-05-25 13:38:00,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt... + 2: [2023-05-25 13:38:00,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt... +12: [2023-05-25 13:38:00,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +12: [2023-05-25 13:38:00,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +19: [2023-05-25 13:38:00,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt. +19: [2023-05-25 13:38:00,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt. +21: [2023-05-25 13:38:00,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt. + 8: [2023-05-25 13:38:00,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +21: [2023-05-25 13:38:00,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt. + 1: [2023-05-25 13:38:00,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +14: [2023-05-25 13:38:00,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:00,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +12: [2023-05-25 13:38:00,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt... + 3: [2023-05-25 13:38:00,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +12: [2023-05-25 13:38:00,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt... + 3: [2023-05-25 13:38:00,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt... + 3: [2023-05-25 13:38:00,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt... + 1: [2023-05-25 13:38:00,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt... + 1: [2023-05-25 13:38:00,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt... + 5: [2023-05-25 13:38:00,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt... + 1: [2023-05-25 13:38:00,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt... + 4: [2023-05-25 13:38:00,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +18: [2023-05-25 13:38:00,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt. + 4: [2023-05-25 13:38:00,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +18: [2023-05-25 13:38:00,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt. + 0: [2023-05-25 13:38:00,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +13: [2023-05-25 13:38:00,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt... + 3: [2023-05-25 13:38:00,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt... + 3: [2023-05-25 13:38:00,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt... + 3: [2023-05-25 13:38:00,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt... + 3: [2023-05-25 13:38:00,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt... + 3: [2023-05-25 13:38:00,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt... + 3: [2023-05-25 13:38:00,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,843] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt... + 5: [2023-05-25 13:38:00,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt... + 5: [2023-05-25 13:38:00,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt... + 5: [2023-05-25 13:38:00,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt... + 5: [2023-05-25 13:38:00,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt... + 5: [2023-05-25 13:38:00,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt... + 5: [2023-05-25 13:38:00,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt... + 7: [2023-05-25 13:38:00,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +13: [2023-05-25 13:38:00,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt... +14: [2023-05-25 13:38:00,846] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:00,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt... + 7: [2023-05-25 13:38:00,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt... +13: [2023-05-25 13:38:00,848] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt... +11: [2023-05-25 13:38:00,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +19: [2023-05-25 13:38:00,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +21: [2023-05-25 13:38:00,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +21: [2023-05-25 13:38:00,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +12: [2023-05-25 13:38:00,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt. +13: [2023-05-25 13:38:00,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt... + 8: [2023-05-25 13:38:00,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +23: [2023-05-25 13:38:00,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt. +23: [2023-05-25 13:38:00,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt. + 6: [2023-05-25 13:38:00,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +14: [2023-05-25 13:38:00,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +19: [2023-05-25 13:38:00,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +14: [2023-05-25 13:38:00,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +11: [2023-05-25 13:38:00,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt... +12: [2023-05-25 13:38:00,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_16-model_03-model_states.pt. +10: [2023-05-25 13:38:00,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:00,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:00,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +17: [2023-05-25 13:38:00,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt. +14: [2023-05-25 13:38:00,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt... +17: [2023-05-25 13:38:00,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt. + 6: [2023-05-25 13:38:00,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt... + 2: [2023-05-25 13:38:00,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +18: [2023-05-25 13:38:00,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt... +14: [2023-05-25 13:38:00,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:00,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt... +10: [2023-05-25 13:38:00,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt... + 4: [2023-05-25 13:38:00,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +18: [2023-05-25 13:38:00,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +15: [2023-05-25 13:38:00,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt... +11: [2023-05-25 13:38:00,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:00,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt... + 6: [2023-05-25 13:38:00,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt... +15: [2023-05-25 13:38:00,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt... +11: [2023-05-25 13:38:00,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt... + 2: [2023-05-25 13:38:00,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt... + 2: [2023-05-25 13:38:00,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt... +16: [2023-05-25 13:38:00,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt. +16: [2023-05-25 13:38:00,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt. +15: [2023-05-25 13:38:00,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt... +15: [2023-05-25 13:38:00,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt... + 9: [2023-05-25 13:38:00,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,863] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt... + 6: [2023-05-25 13:38:00,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,863] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,863] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +22: [2023-05-25 13:38:00,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt. + 6: [2023-05-25 13:38:00,863] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt... +12: [2023-05-25 13:38:00,863] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +22: [2023-05-25 13:38:00,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt. + 6: [2023-05-25 13:38:00,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +23: [2023-05-25 13:38:00,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +23: [2023-05-25 13:38:00,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,866] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,866] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +12: [2023-05-25 13:38:00,868] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt... +17: [2023-05-25 13:38:00,868] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +17: [2023-05-25 13:38:00,868] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +16: [2023-05-25 13:38:00,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +16: [2023-05-25 13:38:00,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt... + 7: [2023-05-25 13:38:00,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt... + 5: [2023-05-25 13:38:00,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +19: [2023-05-25 13:38:00,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +22: [2023-05-25 13:38:00,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +22: [2023-05-25 13:38:00,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +19: [2023-05-25 13:38:00,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt... +21: [2023-05-25 13:38:00,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +21: [2023-05-25 13:38:00,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +19: [2023-05-25 13:38:00,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_00-model_states.pt. +18: [2023-05-25 13:38:00,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt... + 7: [2023-05-25 13:38:00,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt... +21: [2023-05-25 13:38:00,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt... +21: [2023-05-25 13:38:00,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt... +19: [2023-05-25 13:38:00,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt... +18: [2023-05-25 13:38:00,883] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +18: [2023-05-25 13:38:00,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt... +18: [2023-05-25 13:38:00,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt... + 3: [2023-05-25 13:38:00,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,891] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,891] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +12: [2023-05-25 13:38:00,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +23: [2023-05-25 13:38:00,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +12: [2023-05-25 13:38:00,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt... +23: [2023-05-25 13:38:00,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +12: [2023-05-25 13:38:00,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_00-model_states.pt. +17: [2023-05-25 13:38:00,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +17: [2023-05-25 13:38:00,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt. +20: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_27-model_01-model_states.pt. +23: [2023-05-25 13:38:00,899] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt... +23: [2023-05-25 13:38:00,899] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt... + 5: [2023-05-25 13:38:00,899] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +12: [2023-05-25 13:38:00,899] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt... +16: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +16: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +17: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt... +17: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt... +16: [2023-05-25 13:38:00,905] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt... +16: [2023-05-25 13:38:00,905] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt... +22: [2023-05-25 13:38:00,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +22: [2023-05-25 13:38:00,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:00,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt. +18: [2023-05-25 13:38:00,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt. +22: [2023-05-25 13:38:00,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt... +22: [2023-05-25 13:38:00,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt... +20: [2023-05-25 13:38:00,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +20: [2023-05-25 13:38:00,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt... +18: [2023-05-25 13:38:00,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +18: [2023-05-25 13:38:00,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +20: [2023-05-25 13:38:00,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:00,947] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt... +22: [2023-05-25 13:38:00,947] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt. +22: [2023-05-25 13:38:00,947] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt. +21: [2023-05-25 13:38:00,949] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt. +21: [2023-05-25 13:38:00,949] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt. +20: [2023-05-25 13:38:00,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:00,952] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt. +23: [2023-05-25 13:38:00,952] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt. +20: [2023-05-25 13:38:00,953] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt... +22: [2023-05-25 13:38:00,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +22: [2023-05-25 13:38:00,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +21: [2023-05-25 13:38:00,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +21: [2023-05-25 13:38:00,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +23: [2023-05-25 13:38:00,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +23: [2023-05-25 13:38:00,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +28: [2023-05-25 13:38:00,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt. +28: [2023-05-25 13:38:00,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt. +28: [2023-05-25 13:38:00,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +28: [2023-05-25 13:38:00,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +19: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt. +19: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt. +31: [2023-05-25 13:38:01,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt. +31: [2023-05-25 13:38:01,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt. +31: [2023-05-25 13:38:01,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt. +31: [2023-05-25 13:38:01,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt. +25: [2023-05-25 13:38:01,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt. +25: [2023-05-25 13:38:01,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt. +17: [2023-05-25 13:38:01,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt. +17: [2023-05-25 13:38:01,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt. +19: [2023-05-25 13:38:01,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +19: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +24: [2023-05-25 13:38:01,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt. +24: [2023-05-25 13:38:01,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt. +31: [2023-05-25 13:38:01,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +10: [2023-05-25 13:38:01,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +25: [2023-05-25 13:38:01,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt. +25: [2023-05-25 13:38:01,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt. +10: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +31: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +10: [2023-05-25 13:38:01,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +25: [2023-05-25 13:38:01,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt. +25: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt. +16: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt. +31: [2023-05-25 13:38:01,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +27: [2023-05-25 13:38:01,022] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt. +27: [2023-05-25 13:38:01,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt. +25: [2023-05-25 13:38:01,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +25: [2023-05-25 13:38:01,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +16: [2023-05-25 13:38:01,024] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt. +31: [2023-05-25 13:38:01,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +17: [2023-05-25 13:38:01,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +28: [2023-05-25 13:38:01,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt. +28: [2023-05-25 13:38:01,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt. +17: [2023-05-25 13:38:01,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +29: [2023-05-25 13:38:01,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt. +24: [2023-05-25 13:38:01,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt. +29: [2023-05-25 13:38:01,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt. +24: [2023-05-25 13:38:01,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +24: [2023-05-25 13:38:01,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt. +24: [2023-05-25 13:38:01,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +30: [2023-05-25 13:38:01,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt. +30: [2023-05-25 13:38:01,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt. +26: [2023-05-25 13:38:01,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt. +26: [2023-05-25 13:38:01,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_02-model_states.pt. +25: [2023-05-25 13:38:01,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +16: [2023-05-25 13:38:01,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +25: [2023-05-25 13:38:01,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +25: [2023-05-25 13:38:01,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +15: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +25: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +15: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +15: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +28: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt. +11: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +15: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +28: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +11: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +16: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +28: [2023-05-25 13:38:01,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +29: [2023-05-25 13:38:01,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt. +11: [2023-05-25 13:38:01,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +29: [2023-05-25 13:38:01,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt. +11: [2023-05-25 13:38:01,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt. + 2: [2023-05-25 13:38:01,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt. +26: [2023-05-25 13:38:01,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt. +24: [2023-05-25 13:38:01,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +30: [2023-05-25 13:38:01,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt. +27: [2023-05-25 13:38:01,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt. + 9: [2023-05-25 13:38:01,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt. +26: [2023-05-25 13:38:01,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt. +12: [2023-05-25 13:38:01,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +11: [2023-05-25 13:38:01,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt. +11: [2023-05-25 13:38:01,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt. +12: [2023-05-25 13:38:01,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +27: [2023-05-25 13:38:01,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +30: [2023-05-25 13:38:01,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt. +24: [2023-05-25 13:38:01,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +30: [2023-05-25 13:38:01,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +12: [2023-05-25 13:38:01,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +12: [2023-05-25 13:38:01,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +28: [2023-05-25 13:38:01,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt. +30: [2023-05-25 13:38:01,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +29: [2023-05-25 13:38:01,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +26: [2023-05-25 13:38:01,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +28: [2023-05-25 13:38:01,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +26: [2023-05-25 13:38:01,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +29: [2023-05-25 13:38:01,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +20: [2023-05-25 13:38:01,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt. +24: [2023-05-25 13:38:01,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt. +24: [2023-05-25 13:38:01,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt. + 2: [2023-05-25 13:38:01,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +29: [2023-05-25 13:38:01,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +20: [2023-05-25 13:38:01,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_02-model_states.pt. +10: [2023-05-25 13:38:01,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +11: [2023-05-25 13:38:01,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +11: [2023-05-25 13:38:01,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +26: [2023-05-25 13:38:01,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +26: [2023-05-25 13:38:01,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt. +30: [2023-05-25 13:38:01,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt. +30: [2023-05-25 13:38:01,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +30: [2023-05-25 13:38:01,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +28: [2023-05-25 13:38:01,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt. +30: [2023-05-25 13:38:01,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt. +29: [2023-05-25 13:38:01,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +27: [2023-05-25 13:38:01,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt. +27: [2023-05-25 13:38:01,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt. +31: [2023-05-25 13:38:01,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt. +28: [2023-05-25 13:38:01,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +28: [2023-05-25 13:38:01,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +28: [2023-05-25 13:38:01,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +28: [2023-05-25 13:38:01,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +31: [2023-05-25 13:38:01,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt. +10: [2023-05-25 13:38:01,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +30: [2023-05-25 13:38:01,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +20: [2023-05-25 13:38:01,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +30: [2023-05-25 13:38:01,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +10: [2023-05-25 13:38:01,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +28: [2023-05-25 13:38:01,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +28: [2023-05-25 13:38:01,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +15: [2023-05-25 13:38:01,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt. +15: [2023-05-25 13:38:01,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt. +28: [2023-05-25 13:38:01,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt... +28: [2023-05-25 13:38:01,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt... +28: [2023-05-25 13:38:01,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:01,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +24: [2023-05-25 13:38:01,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +27: [2023-05-25 13:38:01,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt. +27: [2023-05-25 13:38:01,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_01-model_states.pt. +24: [2023-05-25 13:38:01,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt. + 4: [2023-05-25 13:38:01,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +10: [2023-05-25 13:38:01,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt. + 0: [2023-05-25 13:38:01,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt. +28: [2023-05-25 13:38:01,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt... + 4: [2023-05-25 13:38:01,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt. +27: [2023-05-25 13:38:01,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +29: [2023-05-25 13:38:01,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt. +30: [2023-05-25 13:38:01,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt. +29: [2023-05-25 13:38:01,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt. +27: [2023-05-25 13:38:01,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +10: [2023-05-25 13:38:01,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt. +20: [2023-05-25 13:38:01,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +30: [2023-05-25 13:38:01,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +23: [2023-05-25 13:38:01,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:01,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:01,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +23: [2023-05-25 13:38:01,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +27: [2023-05-25 13:38:01,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +23: [2023-05-25 13:38:01,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +28: [2023-05-25 13:38:01,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +27: [2023-05-25 13:38:01,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +23: [2023-05-25 13:38:01,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt. +31: [2023-05-25 13:38:01,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +26: [2023-05-25 13:38:01,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt. +26: [2023-05-25 13:38:01,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_39-model_03-model_states.pt. +28: [2023-05-25 13:38:01,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt... + 1: [2023-05-25 13:38:01,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt. + 0: [2023-05-25 13:38:01,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt. +23: [2023-05-25 13:38:01,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt... +23: [2023-05-25 13:38:01,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt... +31: [2023-05-25 13:38:01,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt. +12: [2023-05-25 13:38:01,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +14: [2023-05-25 13:38:01,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +25: [2023-05-25 13:38:01,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:01,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +14: [2023-05-25 13:38:01,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +25: [2023-05-25 13:38:01,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:01,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:01,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +27: [2023-05-25 13:38:01,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +25: [2023-05-25 13:38:01,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:01,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt. +25: [2023-05-25 13:38:01,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +13: [2023-05-25 13:38:01,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt. + 7: [2023-05-25 13:38:01,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt. + 3: [2023-05-25 13:38:01,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt. + 3: [2023-05-25 13:38:01,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt. +13: [2023-05-25 13:38:01,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt. +14: [2023-05-25 13:38:01,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +25: [2023-05-25 13:38:01,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +30: [2023-05-25 13:38:01,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +14: [2023-05-25 13:38:01,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +27: [2023-05-25 13:38:01,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +25: [2023-05-25 13:38:01,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +25: [2023-05-25 13:38:01,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt... +25: [2023-05-25 13:38:01,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt... +25: [2023-05-25 13:38:01,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt... +25: [2023-05-25 13:38:01,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt... +25: [2023-05-25 13:38:01,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt... +25: [2023-05-25 13:38:01,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt... +15: [2023-05-25 13:38:01,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +15: [2023-05-25 13:38:01,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +28: [2023-05-25 13:38:01,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt. +12: [2023-05-25 13:38:01,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt. +15: [2023-05-25 13:38:01,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +28: [2023-05-25 13:38:01,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt. + 1: [2023-05-25 13:38:01,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt. +10: [2023-05-25 13:38:01,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +27: [2023-05-25 13:38:01,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +11: [2023-05-25 13:38:01,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +11: [2023-05-25 13:38:01,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt. +27: [2023-05-25 13:38:01,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt. + 5: [2023-05-25 13:38:01,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt. + 5: [2023-05-25 13:38:01,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt. + 5: [2023-05-25 13:38:01,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt. + 5: [2023-05-25 13:38:01,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt. +27: [2023-05-25 13:38:01,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +31: [2023-05-25 13:38:01,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +31: [2023-05-25 13:38:01,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +31: [2023-05-25 13:38:01,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +11: [2023-05-25 13:38:01,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +24: [2023-05-25 13:38:01,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +24: [2023-05-25 13:38:01,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +24: [2023-05-25 13:38:01,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +24: [2023-05-25 13:38:01,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +24: [2023-05-25 13:38:01,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +24: [2023-05-25 13:38:01,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt. +31: [2023-05-25 13:38:01,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +31: [2023-05-25 13:38:01,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +31: [2023-05-25 13:38:01,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_02-model_states.pt. +10: [2023-05-25 13:38:01,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt. + 8: [2023-05-25 13:38:01,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt. + 0: [2023-05-25 13:38:01,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt. +10: [2023-05-25 13:38:01,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt. +11: [2023-05-25 13:38:01,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +29: [2023-05-25 13:38:01,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt. +29: [2023-05-25 13:38:01,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt. +27: [2023-05-25 13:38:01,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt... +15: [2023-05-25 13:38:01,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +24: [2023-05-25 13:38:01,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt... +30: [2023-05-25 13:38:01,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +27: [2023-05-25 13:38:01,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +27: [2023-05-25 13:38:01,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt... +13: [2023-05-25 13:38:01,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +13: [2023-05-25 13:38:01,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +24: [2023-05-25 13:38:01,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt... +24: [2023-05-25 13:38:01,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt... +24: [2023-05-25 13:38:01,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +10: [2023-05-25 13:38:01,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt. +10: [2023-05-25 13:38:01,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt. +24: [2023-05-25 13:38:01,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt... +12: [2023-05-25 13:38:01,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:01,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +11: [2023-05-25 13:38:01,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt... +11: [2023-05-25 13:38:01,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt... +30: [2023-05-25 13:38:01,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt... +24: [2023-05-25 13:38:01,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +13: [2023-05-25 13:38:01,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +13: [2023-05-25 13:38:01,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +26: [2023-05-25 13:38:01,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt. + 6: [2023-05-25 13:38:01,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt. +23: [2023-05-25 13:38:01,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt. +23: [2023-05-25 13:38:01,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt. +12: [2023-05-25 13:38:01,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:01,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +19: [2023-05-25 13:38:01,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt. +17: [2023-05-25 13:38:01,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt. + 6: [2023-05-25 13:38:01,098] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +17: [2023-05-25 13:38:01,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt. +28: [2023-05-25 13:38:01,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +31: [2023-05-25 13:38:01,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +31: [2023-05-25 13:38:01,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt... + 8: [2023-05-25 13:38:01,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt. +31: [2023-05-25 13:38:01,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt... + 8: [2023-05-25 13:38:01,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt. +31: [2023-05-25 13:38:01,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt... + 8: [2023-05-25 13:38:01,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +19: [2023-05-25 13:38:01,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt. +31: [2023-05-25 13:38:01,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +31: [2023-05-25 13:38:01,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt... + 4: [2023-05-25 13:38:01,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +28: [2023-05-25 13:38:01,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +28: [2023-05-25 13:38:01,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +15: [2023-05-25 13:38:01,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +30: [2023-05-25 13:38:01,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +20: [2023-05-25 13:38:01,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt. +28: [2023-05-25 13:38:01,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt... +20: [2023-05-25 13:38:01,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt. +11: [2023-05-25 13:38:01,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +11: [2023-05-25 13:38:01,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +22: [2023-05-25 13:38:01,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:01,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:01,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:01,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +26: [2023-05-25 13:38:01,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +26: [2023-05-25 13:38:01,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +26: [2023-05-25 13:38:01,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +26: [2023-05-25 13:38:01,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +26: [2023-05-25 13:38:01,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +26: [2023-05-25 13:38:01,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:01,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +22: [2023-05-25 13:38:01,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt. +22: [2023-05-25 13:38:01,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt. +24: [2023-05-25 13:38:01,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +24: [2023-05-25 13:38:01,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt. + 4: [2023-05-25 13:38:01,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt. + 5: [2023-05-25 13:38:01,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +10: [2023-05-25 13:38:01,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +22: [2023-05-25 13:38:01,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +10: [2023-05-25 13:38:01,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +22: [2023-05-25 13:38:01,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +22: [2023-05-25 13:38:01,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +22: [2023-05-25 13:38:01,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt... +10: [2023-05-25 13:38:01,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +10: [2023-05-25 13:38:01,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +26: [2023-05-25 13:38:01,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +30: [2023-05-25 13:38:01,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +18: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt. +18: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt. +26: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt... +26: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt... +13: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt. + 5: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +15: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +24: [2023-05-25 13:38:01,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt... +24: [2023-05-25 13:38:01,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt... +26: [2023-05-25 13:38:01,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +26: [2023-05-25 13:38:01,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt... +13: [2023-05-25 13:38:01,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt. +26: [2023-05-25 13:38:01,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt... + 8: [2023-05-25 13:38:01,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +26: [2023-05-25 13:38:01,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +12: [2023-05-25 13:38:01,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt. + 3: [2023-05-25 13:38:01,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +21: [2023-05-25 13:38:01,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt. +21: [2023-05-25 13:38:01,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt. +30: [2023-05-25 13:38:01,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt... +30: [2023-05-25 13:38:01,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +19: [2023-05-25 13:38:01,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +12: [2023-05-25 13:38:01,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt. +31: [2023-05-25 13:38:01,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +23: [2023-05-25 13:38:01,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:01,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +21: [2023-05-25 13:38:01,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +21: [2023-05-25 13:38:01,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +30: [2023-05-25 13:38:01,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +27: [2023-05-25 13:38:01,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +16: [2023-05-25 13:38:01,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt. +16: [2023-05-25 13:38:01,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_03-model_states.pt. + 6: [2023-05-25 13:38:01,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +23: [2023-05-25 13:38:01,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +21: [2023-05-25 13:38:01,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:01,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +28: [2023-05-25 13:38:01,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:01,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +17: [2023-05-25 13:38:01,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +17: [2023-05-25 13:38:01,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +21: [2023-05-25 13:38:01,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +30: [2023-05-25 13:38:01,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt... +25: [2023-05-25 13:38:01,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +23: [2023-05-25 13:38:01,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +12: [2023-05-25 13:38:01,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt. +31: [2023-05-25 13:38:01,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt... +16: [2023-05-25 13:38:01,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:01,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:01,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +12: [2023-05-25 13:38:01,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt. + 6: [2023-05-25 13:38:01,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +16: [2023-05-25 13:38:01,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +20: [2023-05-25 13:38:01,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +28: [2023-05-25 13:38:01,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +21: [2023-05-25 13:38:01,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +31: [2023-05-25 13:38:01,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +30: [2023-05-25 13:38:01,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:01,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +11: [2023-05-25 13:38:01,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt. +11: [2023-05-25 13:38:01,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt. +16: [2023-05-25 13:38:01,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt... + 5: [2023-05-25 13:38:01,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +16: [2023-05-25 13:38:01,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +21: [2023-05-25 13:38:01,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:01,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt... +21: [2023-05-25 13:38:01,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt... + 5: [2023-05-25 13:38:01,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +16: [2023-05-25 13:38:01,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +15: [2023-05-25 13:38:01,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +15: [2023-05-25 13:38:01,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +15: [2023-05-25 13:38:01,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +14: [2023-05-25 13:38:01,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +22: [2023-05-25 13:38:01,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +20: [2023-05-25 13:38:01,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt... +22: [2023-05-25 13:38:01,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt... + 5: [2023-05-25 13:38:01,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +30: [2023-05-25 13:38:01,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt... + 4: [2023-05-25 13:38:01,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt... +18: [2023-05-25 13:38:01,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt... +15: [2023-05-25 13:38:01,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt... +18: [2023-05-25 13:38:01,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +14: [2023-05-25 13:38:01,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt. +21: [2023-05-25 13:38:01,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt... +14: [2023-05-25 13:38:01,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt. +27: [2023-05-25 13:38:01,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +14: [2023-05-25 13:38:01,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +18: [2023-05-25 13:38:01,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt. +12: [2023-05-25 13:38:01,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +13: [2023-05-25 13:38:01,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +18: [2023-05-25 13:38:01,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt. +30: [2023-05-25 13:38:01,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +21: [2023-05-25 13:38:01,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +14: [2023-05-25 13:38:01,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt. +11: [2023-05-25 13:38:01,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt. +14: [2023-05-25 13:38:01,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt. +21: [2023-05-25 13:38:01,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +15: [2023-05-25 13:38:01,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +13: [2023-05-25 13:38:01,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt. +11: [2023-05-25 13:38:01,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt. + 8: [2023-05-25 13:38:01,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt. +27: [2023-05-25 13:38:01,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +23: [2023-05-25 13:38:01,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +27: [2023-05-25 13:38:01,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +26: [2023-05-25 13:38:01,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +31: [2023-05-25 13:38:01,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:01,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +15: [2023-05-25 13:38:01,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt... +16: [2023-05-25 13:38:01,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +14: [2023-05-25 13:38:01,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt. +12: [2023-05-25 13:38:01,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_01-model_states.pt. +31: [2023-05-25 13:38:01,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +23: [2023-05-25 13:38:01,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +25: [2023-05-25 13:38:01,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +27: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +26: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt... +13: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt. +10: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +13: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_02-model_states.pt. +15: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt. +27: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +12: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +15: [2023-05-25 13:38:01,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt. +12: [2023-05-25 13:38:01,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +16: [2023-05-25 13:38:01,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +18: [2023-05-25 13:38:01,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +24: [2023-05-25 13:38:01,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +18: [2023-05-25 13:38:01,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +18: [2023-05-25 13:38:01,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +18: [2023-05-25 13:38:01,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +10: [2023-05-25 13:38:01,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt... +27: [2023-05-25 13:38:01,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt... +19: [2023-05-25 13:38:01,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +11: [2023-05-25 13:38:01,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +19: [2023-05-25 13:38:01,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +24: [2023-05-25 13:38:01,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:01,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +27: [2023-05-25 13:38:01,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:01,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +18: [2023-05-25 13:38:01,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +18: [2023-05-25 13:38:01,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt... +19: [2023-05-25 13:38:01,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:01,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +10: [2023-05-25 13:38:01,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +15: [2023-05-25 13:38:01,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +18: [2023-05-25 13:38:01,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +11: [2023-05-25 13:38:01,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +18: [2023-05-25 13:38:01,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt... +14: [2023-05-25 13:38:01,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +30: [2023-05-25 13:38:01,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt... +10: [2023-05-25 13:38:01,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +19: [2023-05-25 13:38:01,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt... +19: [2023-05-25 13:38:01,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt... + 6: [2023-05-25 13:38:01,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt. +27: [2023-05-25 13:38:01,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt. + 2: [2023-05-25 13:38:01,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt. + 5: [2023-05-25 13:38:01,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt. +19: [2023-05-25 13:38:01,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt. + 5: [2023-05-25 13:38:01,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt. +19: [2023-05-25 13:38:01,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +15: [2023-05-25 13:38:01,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +10: [2023-05-25 13:38:01,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt... +26: [2023-05-25 13:38:01,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +27: [2023-05-25 13:38:01,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +14: [2023-05-25 13:38:01,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +18: [2023-05-25 13:38:01,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +10: [2023-05-25 13:38:01,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt... +13: [2023-05-25 13:38:01,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +20: [2023-05-25 13:38:01,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:01,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:01,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:01,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +18: [2023-05-25 13:38:01,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +14: [2023-05-25 13:38:01,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +11: [2023-05-25 13:38:01,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +27: [2023-05-25 13:38:01,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt... +14: [2023-05-25 13:38:01,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +26: [2023-05-25 13:38:01,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt... + 4: [2023-05-25 13:38:01,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt. + 0: [2023-05-25 13:38:01,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt. +20: [2023-05-25 13:38:01,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +20: [2023-05-25 13:38:01,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +13: [2023-05-25 13:38:01,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +30: [2023-05-25 13:38:01,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:01,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +20: [2023-05-25 13:38:01,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt... +20: [2023-05-25 13:38:01,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt. + 0: [2023-05-25 13:38:01,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt. + 0: [2023-05-25 13:38:01,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +10: [2023-05-25 13:38:01,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +22: [2023-05-25 13:38:01,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:01,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +19: [2023-05-25 13:38:01,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt. +10: [2023-05-25 13:38:01,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +11: [2023-05-25 13:38:01,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +31: [2023-05-25 13:38:01,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_01-model_states.pt. + 6: [2023-05-25 13:38:01,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +10: [2023-05-25 13:38:01,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt... +14: [2023-05-25 13:38:01,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt. +27: [2023-05-25 13:38:01,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +26: [2023-05-25 13:38:01,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +19: [2023-05-25 13:38:01,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt... +15: [2023-05-25 13:38:01,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +22: [2023-05-25 13:38:01,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +13: [2023-05-25 13:38:01,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt... +10: [2023-05-25 13:38:01,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt... +24: [2023-05-25 13:38:01,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +10: [2023-05-25 13:38:01,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +17: [2023-05-25 13:38:01,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:01,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:01,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:01,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +24: [2023-05-25 13:38:01,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +22: [2023-05-25 13:38:01,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt... + 3: [2023-05-25 13:38:01,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_05-model_03-model_states.pt. +23: [2023-05-25 13:38:01,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt... +17: [2023-05-25 13:38:01,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +15: [2023-05-25 13:38:01,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +13: [2023-05-25 13:38:01,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt... +17: [2023-05-25 13:38:01,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +26: [2023-05-25 13:38:01,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +14: [2023-05-25 13:38:01,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt... +13: [2023-05-25 13:38:01,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +17: [2023-05-25 13:38:01,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt... +17: [2023-05-25 13:38:01,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt... +17: [2023-05-25 13:38:01,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +17: [2023-05-25 13:38:01,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt... +23: [2023-05-25 13:38:01,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +17: [2023-05-25 13:38:01,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt... +17: [2023-05-25 13:38:01,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt... + 4: [2023-05-25 13:38:01,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +29: [2023-05-25 13:38:01,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +29: [2023-05-25 13:38:01,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +13: [2023-05-25 13:38:01,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +29: [2023-05-25 13:38:01,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +29: [2023-05-25 13:38:01,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +13: [2023-05-25 13:38:01,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +20: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:01,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +29: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +29: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +20: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +29: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +29: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt... + 9: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt... + 4: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt... + 4: [2023-05-25 13:38:01,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt... + 6: [2023-05-25 13:38:01,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +21: [2023-05-25 13:38:01,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +23: [2023-05-25 13:38:01,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt... + 1: [2023-05-25 13:38:01,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt... +16: [2023-05-25 13:38:01,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt... +19: [2023-05-25 13:38:01,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt... + 9: [2023-05-25 13:38:01,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt... +22: [2023-05-25 13:38:01,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +29: [2023-05-25 13:38:01,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt... + 6: [2023-05-25 13:38:01,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +13: [2023-05-25 13:38:01,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt... + 9: [2023-05-25 13:38:01,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt. +20: [2023-05-25 13:38:01,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt... +20: [2023-05-25 13:38:01,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt... +29: [2023-05-25 13:38:01,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +29: [2023-05-25 13:38:01,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt... +23: [2023-05-25 13:38:01,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt... +29: [2023-05-25 13:38:01,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt... + 5: [2023-05-25 13:38:01,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +21: [2023-05-25 13:38:01,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +21: [2023-05-25 13:38:01,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:01,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt... +23: [2023-05-25 13:38:01,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt. +29: [2023-05-25 13:38:01,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt... +29: [2023-05-25 13:38:01,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt... +29: [2023-05-25 13:38:01,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt... +29: [2023-05-25 13:38:01,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt... +22: [2023-05-25 13:38:01,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +22: [2023-05-25 13:38:01,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt... +22: [2023-05-25 13:38:01,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +21: [2023-05-25 13:38:01,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +16: [2023-05-25 13:38:01,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt... + 8: [2023-05-25 13:38:01,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt... + 7: [2023-05-25 13:38:01,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +18: [2023-05-25 13:38:01,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +13: [2023-05-25 13:38:01,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt... + 8: [2023-05-25 13:38:01,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,157] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +26: [2023-05-25 13:38:01,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,157] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +13: [2023-05-25 13:38:01,157] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt... + 8: [2023-05-25 13:38:01,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +12: [2023-05-25 13:38:01,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +13: [2023-05-25 13:38:01,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt... +21: [2023-05-25 13:38:01,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +21: [2023-05-25 13:38:01,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +18: [2023-05-25 13:38:01,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +11: [2023-05-25 13:38:01,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +18: [2023-05-25 13:38:01,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt... + 7: [2023-05-25 13:38:01,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt... + 2: [2023-05-25 13:38:01,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt... +12: [2023-05-25 13:38:01,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt... +12: [2023-05-25 13:38:01,161] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt... +16: [2023-05-25 13:38:01,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,162] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt... +11: [2023-05-25 13:38:01,162] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt... +12: [2023-05-25 13:38:01,162] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt... +18: [2023-05-25 13:38:01,161] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt... + 3: [2023-05-25 13:38:01,162] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... +12: [2023-05-25 13:38:01,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt... +12: [2023-05-25 13:38:01,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +21: [2023-05-25 13:38:01,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +16: [2023-05-25 13:38:01,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +11: [2023-05-25 13:38:01,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +19: [2023-05-25 13:38:01,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +18: [2023-05-25 13:38:01,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +12: [2023-05-25 13:38:01,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt... +19: [2023-05-25 13:38:01,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +26: [2023-05-25 13:38:01,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +12: [2023-05-25 13:38:01,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt. +12: [2023-05-25 13:38:01,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt... +12: [2023-05-25 13:38:01,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_17-model_03-model_states.pt. +23: [2023-05-25 13:38:01,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +19: [2023-05-25 13:38:01,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt. +14: [2023-05-25 13:38:01,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +11: [2023-05-25 13:38:01,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +14: [2023-05-25 13:38:01,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +21: [2023-05-25 13:38:01,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +19: [2023-05-25 13:38:01,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt. +16: [2023-05-25 13:38:01,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:01,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +11: [2023-05-25 13:38:01,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +18: [2023-05-25 13:38:01,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +14: [2023-05-25 13:38:01,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt... +14: [2023-05-25 13:38:01,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt... +14: [2023-05-25 13:38:01,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +13: [2023-05-25 13:38:01,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +18: [2023-05-25 13:38:01,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:01,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:01,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +15: [2023-05-25 13:38:01,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +18: [2023-05-25 13:38:01,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +18: [2023-05-25 13:38:01,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt... +11: [2023-05-25 13:38:01,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +14: [2023-05-25 13:38:01,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt... +11: [2023-05-25 13:38:01,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt... +18: [2023-05-25 13:38:01,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt... +15: [2023-05-25 13:38:01,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +13: [2023-05-25 13:38:01,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt... +20: [2023-05-25 13:38:01,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +13: [2023-05-25 13:38:01,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +11: [2023-05-25 13:38:01,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt... +14: [2023-05-25 13:38:01,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +15: [2023-05-25 13:38:01,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +15: [2023-05-25 13:38:01,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +15: [2023-05-25 13:38:01,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt... +14: [2023-05-25 13:38:01,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +14: [2023-05-25 13:38:01,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt... +18: [2023-05-25 13:38:01,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +21: [2023-05-25 13:38:01,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt. + 2: [2023-05-25 13:38:01,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +21: [2023-05-25 13:38:01,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt. +15: [2023-05-25 13:38:01,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt... +16: [2023-05-25 13:38:01,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt... +16: [2023-05-25 13:38:01,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt... +19: [2023-05-25 13:38:01,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +17: [2023-05-25 13:38:01,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +12: [2023-05-25 13:38:01,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +17: [2023-05-25 13:38:01,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +17: [2023-05-25 13:38:01,182] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt. +17: [2023-05-25 13:38:01,182] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt. +14: [2023-05-25 13:38:01,182] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt... +15: [2023-05-25 13:38:01,182] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +14: [2023-05-25 13:38:01,183] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt... +19: [2023-05-25 13:38:01,183] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt... + 8: [2023-05-25 13:38:01,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt... +12: [2023-05-25 13:38:01,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt... +19: [2023-05-25 13:38:01,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +19: [2023-05-25 13:38:01,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt... +13: [2023-05-25 13:38:01,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +15: [2023-05-25 13:38:01,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,186] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +18: [2023-05-25 13:38:01,186] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +20: [2023-05-25 13:38:01,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +13: [2023-05-25 13:38:01,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,187] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt... +20: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +16: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt. + 7: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt... +16: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt. +13: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt... +22: [2023-05-25 13:38:01,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt. + 3: [2023-05-25 13:38:01,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +22: [2023-05-25 13:38:01,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt. +29: [2023-05-25 13:38:01,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. +29: [2023-05-25 13:38:01,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +13: [2023-05-25 13:38:01,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt... + 1: [2023-05-25 13:38:01,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt... + 9: [2023-05-25 13:38:01,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +21: [2023-05-25 13:38:01,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt... +21: [2023-05-25 13:38:01,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt... + 3: [2023-05-25 13:38:01,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +23: [2023-05-25 13:38:01,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +17: [2023-05-25 13:38:01,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt... +17: [2023-05-25 13:38:01,197] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +17: [2023-05-25 13:38:01,197] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +17: [2023-05-25 13:38:01,197] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,197] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_00-model_states.pt. +23: [2023-05-25 13:38:01,197] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +23: [2023-05-25 13:38:01,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:01,203] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +16: [2023-05-25 13:38:01,203] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +23: [2023-05-25 13:38:01,203] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +22: [2023-05-25 13:38:01,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +22: [2023-05-25 13:38:01,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +29: [2023-05-25 13:38:01,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +12: [2023-05-25 13:38:01,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt... + 3: [2023-05-25 13:38:01,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. +29: [2023-05-25 13:38:01,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +12: [2023-05-25 13:38:01,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt... +12: [2023-05-25 13:38:01,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt... +19: [2023-05-25 13:38:01,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +12: [2023-05-25 13:38:01,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +19: [2023-05-25 13:38:01,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt... +19: [2023-05-25 13:38:01,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:01,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt... + 2: [2023-05-25 13:38:01,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +21: [2023-05-25 13:38:01,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +21: [2023-05-25 13:38:01,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt... +17: [2023-05-25 13:38:01,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:01,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:01,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt. +20: [2023-05-25 13:38:01,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_28-model_01-model_states.pt. +21: [2023-05-25 13:38:01,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:01,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt... +17: [2023-05-25 13:38:01,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt... +21: [2023-05-25 13:38:01,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt... +16: [2023-05-25 13:38:01,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:01,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:01,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt... +22: [2023-05-25 13:38:01,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt... +16: [2023-05-25 13:38:01,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:01,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:01,242] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt... +20: [2023-05-25 13:38:01,243] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +22: [2023-05-25 13:38:01,243] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt... +20: [2023-05-25 13:38:01,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt... +28: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt. +28: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt. +20: [2023-05-25 13:38:01,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +28: [2023-05-25 13:38:01,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +28: [2023-05-25 13:38:01,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +20: [2023-05-25 13:38:01,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:01,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt... +20: [2023-05-25 13:38:01,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt... +12: [2023-05-25 13:38:01,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +12: [2023-05-25 13:38:01,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +15: [2023-05-25 13:38:01,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +15: [2023-05-25 13:38:01,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +12: [2023-05-25 13:38:01,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +15: [2023-05-25 13:38:01,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +15: [2023-05-25 13:38:01,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:01,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +25: [2023-05-25 13:38:01,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt. +25: [2023-05-25 13:38:01,309] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt. +10: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:01,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +10: [2023-05-25 13:38:01,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +31: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt. +31: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt. + 4: [2023-05-25 13:38:01,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt. +31: [2023-05-25 13:38:01,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt. +14: [2023-05-25 13:38:01,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt. +14: [2023-05-25 13:38:01,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +31: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt. +25: [2023-05-25 13:38:01,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +25: [2023-05-25 13:38:01,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +12: [2023-05-25 13:38:01,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +14: [2023-05-25 13:38:01,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +21: [2023-05-25 13:38:01,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt. +21: [2023-05-25 13:38:01,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt. +16: [2023-05-25 13:38:01,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt. +16: [2023-05-25 13:38:01,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt. +15: [2023-05-25 13:38:01,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +14: [2023-05-25 13:38:01,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +15: [2023-05-25 13:38:01,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +24: [2023-05-25 13:38:01,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt. +24: [2023-05-25 13:38:01,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt. +30: [2023-05-25 13:38:01,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt. +30: [2023-05-25 13:38:01,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt. +31: [2023-05-25 13:38:01,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +12: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +26: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt. +26: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt. +12: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +21: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +17: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt. +17: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt. +16: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +27: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt. +29: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt. +27: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt. +15: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +16: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +21: [2023-05-25 13:38:01,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +31: [2023-05-25 13:38:01,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +29: [2023-05-25 13:38:01,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_03-model_states.pt. +15: [2023-05-25 13:38:01,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +30: [2023-05-25 13:38:01,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:01,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:01,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +10: [2023-05-25 13:38:01,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:01,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +30: [2023-05-25 13:38:01,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +19: [2023-05-25 13:38:01,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt. +12: [2023-05-25 13:38:01,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +27: [2023-05-25 13:38:01,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +18: [2023-05-25 13:38:01,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt. +19: [2023-05-25 13:38:01,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt. +23: [2023-05-25 13:38:01,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt. + 4: [2023-05-25 13:38:01,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt. +18: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt. +27: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +17: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +17: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +22: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt. +30: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +26: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +23: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt. +30: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +22: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt. +27: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +27: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +29: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +26: [2023-05-25 13:38:01,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +30: [2023-05-25 13:38:01,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +29: [2023-05-25 13:38:01,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +30: [2023-05-25 13:38:01,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +26: [2023-05-25 13:38:01,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt. +26: [2023-05-25 13:38:01,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt. +10: [2023-05-25 13:38:01,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +14: [2023-05-25 13:38:01,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:01,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +19: [2023-05-25 13:38:01,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +14: [2023-05-25 13:38:01,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +19: [2023-05-25 13:38:01,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +27: [2023-05-25 13:38:01,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +23: [2023-05-25 13:38:01,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +18: [2023-05-25 13:38:01,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +27: [2023-05-25 13:38:01,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +13: [2023-05-25 13:38:01,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt. +18: [2023-05-25 13:38:01,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +13: [2023-05-25 13:38:01,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt. +22: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:01,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +20: [2023-05-25 13:38:01,369] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt. +20: [2023-05-25 13:38:01,369] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_02-model_states.pt. + 1: [2023-05-25 13:38:01,369] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt. + 1: [2023-05-25 13:38:01,369] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt. +22: [2023-05-25 13:38:01,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +26: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +30: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt. +30: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt. +14: [2023-05-25 13:38:01,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +26: [2023-05-25 13:38:01,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +14: [2023-05-25 13:38:01,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +25: [2023-05-25 13:38:01,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +11: [2023-05-25 13:38:01,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +25: [2023-05-25 13:38:01,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:01,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +11: [2023-05-25 13:38:01,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +25: [2023-05-25 13:38:01,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +18: [2023-05-25 13:38:01,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt. +25: [2023-05-25 13:38:01,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +18: [2023-05-25 13:38:01,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt. +11: [2023-05-25 13:38:01,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +11: [2023-05-25 13:38:01,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt. +28: [2023-05-25 13:38:01,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt. +28: [2023-05-25 13:38:01,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt. + 9: [2023-05-25 13:38:01,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt. +25: [2023-05-25 13:38:01,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +25: [2023-05-25 13:38:01,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt... +25: [2023-05-25 13:38:01,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +27: [2023-05-25 13:38:01,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt. +27: [2023-05-25 13:38:01,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt. +25: [2023-05-25 13:38:01,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt. +25: [2023-05-25 13:38:01,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt. + 8: [2023-05-25 13:38:01,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:01,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +13: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +13: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:01,383] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +20: [2023-05-25 13:38:01,383] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +30: [2023-05-25 13:38:01,383] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +13: [2023-05-25 13:38:01,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +30: [2023-05-25 13:38:01,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:01,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +24: [2023-05-25 13:38:01,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt. +13: [2023-05-25 13:38:01,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt. +30: [2023-05-25 13:38:01,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt... +24: [2023-05-25 13:38:01,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +20: [2023-05-25 13:38:01,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +30: [2023-05-25 13:38:01,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:01,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +24: [2023-05-25 13:38:01,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +24: [2023-05-25 13:38:01,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +28: [2023-05-25 13:38:01,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +26: [2023-05-25 13:38:01,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt. +26: [2023-05-25 13:38:01,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt. +24: [2023-05-25 13:38:01,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt. +28: [2023-05-25 13:38:01,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:01,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt. +24: [2023-05-25 13:38:01,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt... +24: [2023-05-25 13:38:01,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt... + 9: [2023-05-25 13:38:01,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +30: [2023-05-25 13:38:01,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +18: [2023-05-25 13:38:01,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +12: [2023-05-25 13:38:01,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt. +12: [2023-05-25 13:38:01,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt. +18: [2023-05-25 13:38:01,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +25: [2023-05-25 13:38:01,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +27: [2023-05-25 13:38:01,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:01,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:01,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:01,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:01,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:01,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:01,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:01,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +27: [2023-05-25 13:38:01,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +27: [2023-05-25 13:38:01,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +27: [2023-05-25 13:38:01,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +30: [2023-05-25 13:38:01,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:01,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:01,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +30: [2023-05-25 13:38:01,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt... +31: [2023-05-25 13:38:01,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt... +10: [2023-05-25 13:38:01,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt. +10: [2023-05-25 13:38:01,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt. +31: [2023-05-25 13:38:01,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt... +31: [2023-05-25 13:38:01,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt... +31: [2023-05-25 13:38:01,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt... +30: [2023-05-25 13:38:01,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +27: [2023-05-25 13:38:01,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt... +27: [2023-05-25 13:38:01,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +27: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +27: [2023-05-25 13:38:01,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt. + 2: [2023-05-25 13:38:01,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt. + 5: [2023-05-25 13:38:01,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +25: [2023-05-25 13:38:01,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:01,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt. +31: [2023-05-25 13:38:01,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt. +12: [2023-05-25 13:38:01,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:01,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +24: [2023-05-25 13:38:01,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +15: [2023-05-25 13:38:01,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt. +24: [2023-05-25 13:38:01,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +15: [2023-05-25 13:38:01,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt. +11: [2023-05-25 13:38:01,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +26: [2023-05-25 13:38:01,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +26: [2023-05-25 13:38:01,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:01,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt. +26: [2023-05-25 13:38:01,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +29: [2023-05-25 13:38:01,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_02-model_states.pt. +26: [2023-05-25 13:38:01,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +26: [2023-05-25 13:38:01,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +26: [2023-05-25 13:38:01,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +26: [2023-05-25 13:38:01,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +26: [2023-05-25 13:38:01,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +30: [2023-05-25 13:38:01,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:01,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +27: [2023-05-25 13:38:01,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +25: [2023-05-25 13:38:01,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +10: [2023-05-25 13:38:01,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +11: [2023-05-25 13:38:01,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt. +11: [2023-05-25 13:38:01,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt. +26: [2023-05-25 13:38:01,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +26: [2023-05-25 13:38:01,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt... +26: [2023-05-25 13:38:01,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt... +30: [2023-05-25 13:38:01,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt... +10: [2023-05-25 13:38:01,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +26: [2023-05-25 13:38:01,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt... +27: [2023-05-25 13:38:01,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +26: [2023-05-25 13:38:01,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt... +26: [2023-05-25 13:38:01,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +11: [2023-05-25 13:38:01,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +22: [2023-05-25 13:38:01,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt. +22: [2023-05-25 13:38:01,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt. +25: [2023-05-25 13:38:01,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt. +30: [2023-05-25 13:38:01,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt. + 7: [2023-05-25 13:38:01,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt. +24: [2023-05-25 13:38:01,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt. +31: [2023-05-25 13:38:01,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:01,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt. + 6: [2023-05-25 13:38:01,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt. + 8: [2023-05-25 13:38:01,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt. + 2: [2023-05-25 13:38:01,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +11: [2023-05-25 13:38:01,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt. + 9: [2023-05-25 13:38:01,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +15: [2023-05-25 13:38:01,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +27: [2023-05-25 13:38:01,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:01,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +15: [2023-05-25 13:38:01,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +27: [2023-05-25 13:38:01,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt... +13: [2023-05-25 13:38:01,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +13: [2023-05-25 13:38:01,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +13: [2023-05-25 13:38:01,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +13: [2023-05-25 13:38:01,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt. +25: [2023-05-25 13:38:01,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +29: [2023-05-25 13:38:01,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +11: [2023-05-25 13:38:01,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt. + 3: [2023-05-25 13:38:01,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt. +11: [2023-05-25 13:38:01,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +10: [2023-05-25 13:38:01,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt. +14: [2023-05-25 13:38:01,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt. +14: [2023-05-25 13:38:01,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt. +29: [2023-05-25 13:38:01,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +11: [2023-05-25 13:38:01,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_02-model_states.pt. +13: [2023-05-25 13:38:01,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt... +11: [2023-05-25 13:38:01,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt. +10: [2023-05-25 13:38:01,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt. +30: [2023-05-25 13:38:01,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:01,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:01,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +13: [2023-05-25 13:38:01,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt... +22: [2023-05-25 13:38:01,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +30: [2023-05-25 13:38:01,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt... +15: [2023-05-25 13:38:01,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt. +22: [2023-05-25 13:38:01,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +11: [2023-05-25 13:38:01,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +15: [2023-05-25 13:38:01,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt. +24: [2023-05-25 13:38:01,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:01,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt... +25: [2023-05-25 13:38:01,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt... +31: [2023-05-25 13:38:01,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +23: [2023-05-25 13:38:01,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +23: [2023-05-25 13:38:01,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,429] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +28: [2023-05-25 13:38:01,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:01,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:01,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:01,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:01,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:01,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +27: [2023-05-25 13:38:01,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt... + 9: [2023-05-25 13:38:01,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt... + 8: [2023-05-25 13:38:01,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +28: [2023-05-25 13:38:01,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +28: [2023-05-25 13:38:01,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +28: [2023-05-25 13:38:01,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt... +28: [2023-05-25 13:38:01,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt... +28: [2023-05-25 13:38:01,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt... +28: [2023-05-25 13:38:01,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt... +13: [2023-05-25 13:38:01,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt. +13: [2023-05-25 13:38:01,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt. +27: [2023-05-25 13:38:01,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt. + 7: [2023-05-25 13:38:01,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +27: [2023-05-25 13:38:01,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt. +31: [2023-05-25 13:38:01,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +24: [2023-05-25 13:38:01,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +27: [2023-05-25 13:38:01,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +14: [2023-05-25 13:38:01,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt. + 6: [2023-05-25 13:38:01,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +14: [2023-05-25 13:38:01,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +13: [2023-05-25 13:38:01,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +24: [2023-05-25 13:38:01,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt. +21: [2023-05-25 13:38:01,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt. +12: [2023-05-25 13:38:01,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +12: [2023-05-25 13:38:01,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +21: [2023-05-25 13:38:01,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt. +24: [2023-05-25 13:38:01,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt. +29: [2023-05-25 13:38:01,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt. +10: [2023-05-25 13:38:01,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +14: [2023-05-25 13:38:01,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +29: [2023-05-25 13:38:01,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt. +24: [2023-05-25 13:38:01,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +11: [2023-05-25 13:38:01,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +10: [2023-05-25 13:38:01,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +10: [2023-05-25 13:38:01,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +21: [2023-05-25 13:38:01,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:01,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:01,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +12: [2023-05-25 13:38:01,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt... + 8: [2023-05-25 13:38:01,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +21: [2023-05-25 13:38:01,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +12: [2023-05-25 13:38:01,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +21: [2023-05-25 13:38:01,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +14: [2023-05-25 13:38:01,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +24: [2023-05-25 13:38:01,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt... +21: [2023-05-25 13:38:01,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +10: [2023-05-25 13:38:01,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:01,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +13: [2023-05-25 13:38:01,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +11: [2023-05-25 13:38:01,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +26: [2023-05-25 13:38:01,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +15: [2023-05-25 13:38:01,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt. + 5: [2023-05-25 13:38:01,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt. +10: [2023-05-25 13:38:01,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt. +31: [2023-05-25 13:38:01,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +23: [2023-05-25 13:38:01,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt. +21: [2023-05-25 13:38:01,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt... +21: [2023-05-25 13:38:01,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt... + 4: [2023-05-25 13:38:01,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt. +24: [2023-05-25 13:38:01,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt. +23: [2023-05-25 13:38:01,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +15: [2023-05-25 13:38:01,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +26: [2023-05-25 13:38:01,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +26: [2023-05-25 13:38:01,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:01,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +19: [2023-05-25 13:38:01,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt. +19: [2023-05-25 13:38:01,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt. +31: [2023-05-25 13:38:01,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt. + 0: [2023-05-25 13:38:01,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt. +27: [2023-05-25 13:38:01,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +27: [2023-05-25 13:38:01,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +14: [2023-05-25 13:38:01,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +26: [2023-05-25 13:38:01,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +26: [2023-05-25 13:38:01,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt... +26: [2023-05-25 13:38:01,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt... +24: [2023-05-25 13:38:01,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:01,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +24: [2023-05-25 13:38:01,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt. + 0: [2023-05-25 13:38:01,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt. +21: [2023-05-25 13:38:01,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +21: [2023-05-25 13:38:01,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +13: [2023-05-25 13:38:01,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +29: [2023-05-25 13:38:01,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +13: [2023-05-25 13:38:01,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +14: [2023-05-25 13:38:01,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +24: [2023-05-25 13:38:01,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt... +16: [2023-05-25 13:38:01,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:01,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:01,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:01,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +15: [2023-05-25 13:38:01,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +31: [2023-05-25 13:38:01,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +17: [2023-05-25 13:38:01,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt. +17: [2023-05-25 13:38:01,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt. +16: [2023-05-25 13:38:01,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +11: [2023-05-25 13:38:01,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt. +16: [2023-05-25 13:38:01,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt. +16: [2023-05-25 13:38:01,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt. +11: [2023-05-25 13:38:01,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +16: [2023-05-25 13:38:01,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt... +16: [2023-05-25 13:38:01,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +31: [2023-05-25 13:38:01,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt... + 9: [2023-05-25 13:38:01,453] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +29: [2023-05-25 13:38:01,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +16: [2023-05-25 13:38:01,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt... +15: [2023-05-25 13:38:01,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt... +21: [2023-05-25 13:38:01,453] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +11: [2023-05-25 13:38:01,454] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt. +11: [2023-05-25 13:38:01,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt... + 4: [2023-05-25 13:38:01,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +26: [2023-05-25 13:38:01,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +10: [2023-05-25 13:38:01,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt. +10: [2023-05-25 13:38:01,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt. + 5: [2023-05-25 13:38:01,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +18: [2023-05-25 13:38:01,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +18: [2023-05-25 13:38:01,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +18: [2023-05-25 13:38:01,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +18: [2023-05-25 13:38:01,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +18: [2023-05-25 13:38:01,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +18: [2023-05-25 13:38:01,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +18: [2023-05-25 13:38:01,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +11: [2023-05-25 13:38:01,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +18: [2023-05-25 13:38:01,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt... +18: [2023-05-25 13:38:01,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +18: [2023-05-25 13:38:01,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt... +18: [2023-05-25 13:38:01,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +18: [2023-05-25 13:38:01,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt... +19: [2023-05-25 13:38:01,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +20: [2023-05-25 13:38:01,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt. +20: [2023-05-25 13:38:01,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt. +15: [2023-05-25 13:38:01,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt. +11: [2023-05-25 13:38:01,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt... +30: [2023-05-25 13:38:01,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt. +19: [2023-05-25 13:38:01,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +30: [2023-05-25 13:38:01,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt. +26: [2023-05-25 13:38:01,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +12: [2023-05-25 13:38:01,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt. +21: [2023-05-25 13:38:01,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt. +28: [2023-05-25 13:38:01,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:01,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt. +12: [2023-05-25 13:38:01,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt. +15: [2023-05-25 13:38:01,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt... +15: [2023-05-25 13:38:01,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt. +25: [2023-05-25 13:38:01,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt. +15: [2023-05-25 13:38:01,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt. +17: [2023-05-25 13:38:01,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +17: [2023-05-25 13:38:01,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +11: [2023-05-25 13:38:01,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +25: [2023-05-25 13:38:01,465] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +14: [2023-05-25 13:38:01,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt. +14: [2023-05-25 13:38:01,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +16: [2023-05-25 13:38:01,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt. +16: [2023-05-25 13:38:01,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt. +23: [2023-05-25 13:38:01,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +23: [2023-05-25 13:38:01,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +23: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +14: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt... +23: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +23: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +29: [2023-05-25 13:38:01,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +23: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +11: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +23: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +14: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +21: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt. +29: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +14: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt. +10: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +10: [2023-05-25 13:38:01,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +14: [2023-05-25 13:38:01,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt... + 8: [2023-05-25 13:38:01,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt... + 9: [2023-05-25 13:38:01,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt. + 4: [2023-05-25 13:38:01,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_03-model_states.pt. + 4: [2023-05-25 13:38:01,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +23: [2023-05-25 13:38:01,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:01,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt... +23: [2023-05-25 13:38:01,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt... + 2: [2023-05-25 13:38:01,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt. + 2: [2023-05-25 13:38:01,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt. +22: [2023-05-25 13:38:01,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:01,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:01,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:01,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:01,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:01,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:01,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:01,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:01,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +13: [2023-05-25 13:38:01,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +23: [2023-05-25 13:38:01,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt... +23: [2023-05-25 13:38:01,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt... + 4: [2023-05-25 13:38:01,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt... + 4: [2023-05-25 13:38:01,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt... +29: [2023-05-25 13:38:01,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt... +17: [2023-05-25 13:38:01,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +11: [2023-05-25 13:38:01,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +17: [2023-05-25 13:38:01,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt... + 4: [2023-05-25 13:38:01,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt... + 4: [2023-05-25 13:38:01,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +22: [2023-05-25 13:38:01,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +22: [2023-05-25 13:38:01,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +10: [2023-05-25 13:38:01,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:01,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +17: [2023-05-25 13:38:01,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt... +17: [2023-05-25 13:38:01,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt... +22: [2023-05-25 13:38:01,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +29: [2023-05-25 13:38:01,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:01,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:01,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:01,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +27: [2023-05-25 13:38:01,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +21: [2023-05-25 13:38:01,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +30: [2023-05-25 13:38:01,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:01,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt. +22: [2023-05-25 13:38:01,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_03-model_states.pt. +20: [2023-05-25 13:38:01,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +30: [2023-05-25 13:38:01,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +22: [2023-05-25 13:38:01,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt... +22: [2023-05-25 13:38:01,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt... +11: [2023-05-25 13:38:01,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt... +27: [2023-05-25 13:38:01,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +24: [2023-05-25 13:38:01,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt... + 9: [2023-05-25 13:38:01,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +16: [2023-05-25 13:38:01,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +28: [2023-05-25 13:38:01,476] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +24: [2023-05-25 13:38:01,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +22: [2023-05-25 13:38:01,476] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt... +12: [2023-05-25 13:38:01,476] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +21: [2023-05-25 13:38:01,476] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +28: [2023-05-25 13:38:01,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +11: [2023-05-25 13:38:01,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +27: [2023-05-25 13:38:01,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt... +10: [2023-05-25 13:38:01,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt... +10: [2023-05-25 13:38:01,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt... +27: [2023-05-25 13:38:01,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt... +29: [2023-05-25 13:38:01,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt... +13: [2023-05-25 13:38:01,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt... +12: [2023-05-25 13:38:01,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +29: [2023-05-25 13:38:01,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +25: [2023-05-25 13:38:01,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +29: [2023-05-25 13:38:01,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt... +29: [2023-05-25 13:38:01,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt... +20: [2023-05-25 13:38:01,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +24: [2023-05-25 13:38:01,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt... + 6: [2023-05-25 13:38:01,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt. +13: [2023-05-25 13:38:01,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +29: [2023-05-25 13:38:01,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:01,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +15: [2023-05-25 13:38:01,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +14: [2023-05-25 13:38:01,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +18: [2023-05-25 13:38:01,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +11: [2023-05-25 13:38:01,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt. + 1: [2023-05-25 13:38:01,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +21: [2023-05-25 13:38:01,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt... +15: [2023-05-25 13:38:01,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +19: [2023-05-25 13:38:01,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:01,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:01,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:01,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt. + 7: [2023-05-25 13:38:01,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt. + 1: [2023-05-25 13:38:01,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +15: [2023-05-25 13:38:01,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +13: [2023-05-25 13:38:01,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt... +29: [2023-05-25 13:38:01,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt... + 9: [2023-05-25 13:38:01,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +19: [2023-05-25 13:38:01,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt... +21: [2023-05-25 13:38:01,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:01,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt... +19: [2023-05-25 13:38:01,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt. +14: [2023-05-25 13:38:01,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +19: [2023-05-25 13:38:01,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt. + 6: [2023-05-25 13:38:01,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +14: [2023-05-25 13:38:01,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_06-model_01-model_states.pt. +14: [2023-05-25 13:38:01,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt... +21: [2023-05-25 13:38:01,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt... +28: [2023-05-25 13:38:01,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt. +28: [2023-05-25 13:38:01,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_40-model_01-model_states.pt. +15: [2023-05-25 13:38:01,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt... + 8: [2023-05-25 13:38:01,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +29: [2023-05-25 13:38:01,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +14: [2023-05-25 13:38:01,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +19: [2023-05-25 13:38:01,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +29: [2023-05-25 13:38:01,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +14: [2023-05-25 13:38:01,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt... +18: [2023-05-25 13:38:01,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +15: [2023-05-25 13:38:01,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +19: [2023-05-25 13:38:01,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt... +22: [2023-05-25 13:38:01,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt. + 3: [2023-05-25 13:38:01,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +22: [2023-05-25 13:38:01,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt. +19: [2023-05-25 13:38:01,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +16: [2023-05-25 13:38:01,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +17: [2023-05-25 13:38:01,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt. +17: [2023-05-25 13:38:01,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt. + 0: [2023-05-25 13:38:01,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +15: [2023-05-25 13:38:01,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt... +11: [2023-05-25 13:38:01,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +29: [2023-05-25 13:38:01,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt... + 1: [2023-05-25 13:38:01,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt... +18: [2023-05-25 13:38:01,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +19: [2023-05-25 13:38:01,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt... +23: [2023-05-25 13:38:01,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt... +16: [2023-05-25 13:38:01,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +11: [2023-05-25 13:38:01,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt... + 4: [2023-05-25 13:38:01,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt... + 3: [2023-05-25 13:38:01,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +16: [2023-05-25 13:38:01,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +11: [2023-05-25 13:38:01,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +30: [2023-05-25 13:38:01,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:01,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... +28: [2023-05-25 13:38:01,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +30: [2023-05-25 13:38:01,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt... + 5: [2023-05-25 13:38:01,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +16: [2023-05-25 13:38:01,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt... +11: [2023-05-25 13:38:01,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +17: [2023-05-25 13:38:01,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt... +10: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt... + 3: [2023-05-25 13:38:01,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt... + 5: [2023-05-25 13:38:01,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt... + 5: [2023-05-25 13:38:01,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt... + 5: [2023-05-25 13:38:01,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt... +15: [2023-05-25 13:38:01,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt... +23: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +10: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt... +10: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt... +30: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +15: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +17: [2023-05-25 13:38:01,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:01,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:01,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:01,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:01,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:01,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt... +17: [2023-05-25 13:38:01,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt... +20: [2023-05-25 13:38:01,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:01,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt... +30: [2023-05-25 13:38:01,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt... + 2: [2023-05-25 13:38:01,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +18: [2023-05-25 13:38:01,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +20: [2023-05-25 13:38:01,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +20: [2023-05-25 13:38:01,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt... +20: [2023-05-25 13:38:01,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt... +20: [2023-05-25 13:38:01,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt... +17: [2023-05-25 13:38:01,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt... + 8: [2023-05-25 13:38:01,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt... +22: [2023-05-25 13:38:01,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:01,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +20: [2023-05-25 13:38:01,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +25: [2023-05-25 13:38:01,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +20: [2023-05-25 13:38:01,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt... +22: [2023-05-25 13:38:01,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt... +12: [2023-05-25 13:38:01,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +17: [2023-05-25 13:38:01,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt... +16: [2023-05-25 13:38:01,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt... +22: [2023-05-25 13:38:01,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:01,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +29: [2023-05-25 13:38:01,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +25: [2023-05-25 13:38:01,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:01,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt... +16: [2023-05-25 13:38:01,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt... +12: [2023-05-25 13:38:01,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +12: [2023-05-25 13:38:01,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt... + 4: [2023-05-25 13:38:01,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt... +29: [2023-05-25 13:38:01,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +25: [2023-05-25 13:38:01,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +22: [2023-05-25 13:38:01,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +12: [2023-05-25 13:38:01,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +19: [2023-05-25 13:38:01,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +15: [2023-05-25 13:38:01,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +17: [2023-05-25 13:38:01,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +14: [2023-05-25 13:38:01,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt... + 6: [2023-05-25 13:38:01,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +14: [2023-05-25 13:38:01,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +15: [2023-05-25 13:38:01,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt... +14: [2023-05-25 13:38:01,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt... +23: [2023-05-25 13:38:01,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt... +14: [2023-05-25 13:38:01,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +19: [2023-05-25 13:38:01,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:01,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt. +16: [2023-05-25 13:38:01,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt. + 4: [2023-05-25 13:38:01,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +19: [2023-05-25 13:38:01,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt. + 1: [2023-05-25 13:38:01,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +21: [2023-05-25 13:38:01,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt. +21: [2023-05-25 13:38:01,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt... +19: [2023-05-25 13:38:01,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt. +17: [2023-05-25 13:38:01,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt... +18: [2023-05-25 13:38:01,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt. +20: [2023-05-25 13:38:01,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt. +18: [2023-05-25 13:38:01,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt. +12: [2023-05-25 13:38:01,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt. +20: [2023-05-25 13:38:01,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt. + 0: [2023-05-25 13:38:01,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt... +23: [2023-05-25 13:38:01,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt. + 5: [2023-05-25 13:38:01,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +23: [2023-05-25 13:38:01,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_29-model_03-model_states.pt. +12: [2023-05-25 13:38:01,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt. + 1: [2023-05-25 13:38:01,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt... + 9: [2023-05-25 13:38:01,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +17: [2023-05-25 13:38:01,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:01,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +13: [2023-05-25 13:38:01,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt. +13: [2023-05-25 13:38:01,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_18-model_02-model_states.pt. + 9: [2023-05-25 13:38:01,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt... + 4: [2023-05-25 13:38:01,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +22: [2023-05-25 13:38:01,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt... +17: [2023-05-25 13:38:01,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt... + 7: [2023-05-25 13:38:01,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +19: [2023-05-25 13:38:01,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt... +28: [2023-05-25 13:38:01,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:01,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt... + 7: [2023-05-25 13:38:01,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +17: [2023-05-25 13:38:01,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt... +29: [2023-05-25 13:38:01,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +20: [2023-05-25 13:38:01,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt... +12: [2023-05-25 13:38:01,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +19: [2023-05-25 13:38:01,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +16: [2023-05-25 13:38:01,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +21: [2023-05-25 13:38:01,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +18: [2023-05-25 13:38:01,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +18: [2023-05-25 13:38:01,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +16: [2023-05-25 13:38:01,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +23: [2023-05-25 13:38:01,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:01,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt... +20: [2023-05-25 13:38:01,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt... + 5: [2023-05-25 13:38:01,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +28: [2023-05-25 13:38:01,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt... +28: [2023-05-25 13:38:01,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt... +21: [2023-05-25 13:38:01,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +19: [2023-05-25 13:38:01,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +12: [2023-05-25 13:38:01,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt... +19: [2023-05-25 13:38:01,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +22: [2023-05-25 13:38:01,535] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:01,536] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,536] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,536] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. +13: [2023-05-25 13:38:01,536] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... +20: [2023-05-25 13:38:01,536] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt... +13: [2023-05-25 13:38:01,536] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +22: [2023-05-25 13:38:01,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt... +22: [2023-05-25 13:38:01,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt... + 3: [2023-05-25 13:38:01,539] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,539] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,540] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,540] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt... + 5: [2023-05-25 13:38:01,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,542] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +20: [2023-05-25 13:38:01,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,542] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +17: [2023-05-25 13:38:01,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,543] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt... +17: [2023-05-25 13:38:01,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +20: [2023-05-25 13:38:01,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +19: [2023-05-25 13:38:01,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +12: [2023-05-25 13:38:01,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +19: [2023-05-25 13:38:01,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt... +18: [2023-05-25 13:38:01,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +12: [2023-05-25 13:38:01,553] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt... +18: [2023-05-25 13:38:01,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +20: [2023-05-25 13:38:01,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:01,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +23: [2023-05-25 13:38:01,559] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:01,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +23: [2023-05-25 13:38:01,559] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +16: [2023-05-25 13:38:01,560] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:01,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt... +20: [2023-05-25 13:38:01,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt... +16: [2023-05-25 13:38:01,560] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +18: [2023-05-25 13:38:01,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:01,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:01,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt... +18: [2023-05-25 13:38:01,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt... +16: [2023-05-25 13:38:01,563] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt... +21: [2023-05-25 13:38:01,563] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt... +19: [2023-05-25 13:38:01,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +23: [2023-05-25 13:38:01,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt... +12: [2023-05-25 13:38:01,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +23: [2023-05-25 13:38:01,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt... +19: [2023-05-25 13:38:01,566] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt... +13: [2023-05-25 13:38:01,566] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +13: [2023-05-25 13:38:01,566] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_00-model_states.pt. +12: [2023-05-25 13:38:01,567] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt... +13: [2023-05-25 13:38:01,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt... +13: [2023-05-25 13:38:01,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt... +20: [2023-05-25 13:38:01,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:01,573] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt... +31: [2023-05-25 13:38:01,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:01,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +31: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +25: [2023-05-25 13:38:01,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt. +25: [2023-05-25 13:38:01,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt. +30: [2023-05-25 13:38:01,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt. +30: [2023-05-25 13:38:01,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt. +30: [2023-05-25 13:38:01,643] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +30: [2023-05-25 13:38:01,643] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +30: [2023-05-25 13:38:01,646] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:01,646] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +25: [2023-05-25 13:38:01,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +25: [2023-05-25 13:38:01,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +26: [2023-05-25 13:38:01,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt. +26: [2023-05-25 13:38:01,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt. +30: [2023-05-25 13:38:01,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +24: [2023-05-25 13:38:01,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:01,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +30: [2023-05-25 13:38:01,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +24: [2023-05-25 13:38:01,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +27: [2023-05-25 13:38:01,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:01,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +27: [2023-05-25 13:38:01,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +27: [2023-05-25 13:38:01,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +31: [2023-05-25 13:38:01,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt. +31: [2023-05-25 13:38:01,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt. +31: [2023-05-25 13:38:01,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:01,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:01,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt. +28: [2023-05-25 13:38:01,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt. +27: [2023-05-25 13:38:01,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +24: [2023-05-25 13:38:01,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt. +24: [2023-05-25 13:38:01,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt. +26: [2023-05-25 13:38:01,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +26: [2023-05-25 13:38:01,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +11: [2023-05-25 13:38:01,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +11: [2023-05-25 13:38:01,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +25: [2023-05-25 13:38:01,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt. +25: [2023-05-25 13:38:01,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt. +25: [2023-05-25 13:38:01,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +25: [2023-05-25 13:38:01,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +11: [2023-05-25 13:38:01,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +11: [2023-05-25 13:38:01,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +25: [2023-05-25 13:38:01,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +25: [2023-05-25 13:38:01,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +28: [2023-05-25 13:38:01,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:01,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:01,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +28: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +28: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +28: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +24: [2023-05-25 13:38:01,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt. +24: [2023-05-25 13:38:01,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +31: [2023-05-25 13:38:01,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +31: [2023-05-25 13:38:01,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +31: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +24: [2023-05-25 13:38:01,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt. +24: [2023-05-25 13:38:01,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +31: [2023-05-25 13:38:01,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +26: [2023-05-25 13:38:01,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +13: [2023-05-25 13:38:01,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt. +26: [2023-05-25 13:38:01,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +13: [2023-05-25 13:38:01,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt. +25: [2023-05-25 13:38:01,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +27: [2023-05-25 13:38:01,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt. +25: [2023-05-25 13:38:01,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +25: [2023-05-25 13:38:01,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +26: [2023-05-25 13:38:01,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +27: [2023-05-25 13:38:01,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt. +26: [2023-05-25 13:38:01,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt. +25: [2023-05-25 13:38:01,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +26: [2023-05-25 13:38:01,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt. +26: [2023-05-25 13:38:01,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt. +26: [2023-05-25 13:38:01,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt. +30: [2023-05-25 13:38:01,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +30: [2023-05-25 13:38:01,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +30: [2023-05-25 13:38:01,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +25: [2023-05-25 13:38:01,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt... +25: [2023-05-25 13:38:01,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt... +29: [2023-05-25 13:38:01,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt. +26: [2023-05-25 13:38:01,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +29: [2023-05-25 13:38:01,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt. +30: [2023-05-25 13:38:01,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt... +24: [2023-05-25 13:38:01,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +24: [2023-05-25 13:38:01,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +13: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +10: [2023-05-25 13:38:01,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt. +24: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt. +27: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +27: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +25: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt. +30: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +12: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt. +10: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt. +26: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +27: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +12: [2023-05-25 13:38:01,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt. +30: [2023-05-25 13:38:01,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:01,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +29: [2023-05-25 13:38:01,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +26: [2023-05-25 13:38:01,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +29: [2023-05-25 13:38:01,706] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +26: [2023-05-25 13:38:01,706] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +14: [2023-05-25 13:38:01,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt. +30: [2023-05-25 13:38:01,706] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt... +14: [2023-05-25 13:38:01,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt. +26: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +11: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +26: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +29: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt. +31: [2023-05-25 13:38:01,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +29: [2023-05-25 13:38:01,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt. +31: [2023-05-25 13:38:01,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +11: [2023-05-25 13:38:01,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +27: [2023-05-25 13:38:01,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +25: [2023-05-25 13:38:01,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +26: [2023-05-25 13:38:01,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt... +31: [2023-05-25 13:38:01,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt... +10: [2023-05-25 13:38:01,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +26: [2023-05-25 13:38:01,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:01,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt... +10: [2023-05-25 13:38:01,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +10: [2023-05-25 13:38:01,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +26: [2023-05-25 13:38:01,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt... +10: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt. +10: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt. +10: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +12: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +24: [2023-05-25 13:38:01,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +27: [2023-05-25 13:38:01,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +28: [2023-05-25 13:38:01,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:01,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:01,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:01,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:01,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt. +25: [2023-05-25 13:38:01,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt. +24: [2023-05-25 13:38:01,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +12: [2023-05-25 13:38:01,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +25: [2023-05-25 13:38:01,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +27: [2023-05-25 13:38:01,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:01,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt. +28: [2023-05-25 13:38:01,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt... +28: [2023-05-25 13:38:01,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt... +30: [2023-05-25 13:38:01,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt. +11: [2023-05-25 13:38:01,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +24: [2023-05-25 13:38:01,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +24: [2023-05-25 13:38:01,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +25: [2023-05-25 13:38:01,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt... +14: [2023-05-25 13:38:01,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +24: [2023-05-25 13:38:01,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt... +24: [2023-05-25 13:38:01,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +14: [2023-05-25 13:38:01,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +28: [2023-05-25 13:38:01,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt. +28: [2023-05-25 13:38:01,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt. +21: [2023-05-25 13:38:01,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt. +24: [2023-05-25 13:38:01,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt... +11: [2023-05-25 13:38:01,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +21: [2023-05-25 13:38:01,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt. +25: [2023-05-25 13:38:01,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +27: [2023-05-25 13:38:01,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt. +27: [2023-05-25 13:38:01,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt. +29: [2023-05-25 13:38:01,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +25: [2023-05-25 13:38:01,723] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +29: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt. +25: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt... +25: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt. +30: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt. +25: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt. +31: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt. +31: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt. +15: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +15: [2023-05-25 13:38:01,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,727] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,727] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +15: [2023-05-25 13:38:01,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +15: [2023-05-25 13:38:01,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +27: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +28: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +28: [2023-05-25 13:38:01,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:01,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +15: [2023-05-25 13:38:01,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt. +20: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt. +15: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt. +20: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt. +26: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +27: [2023-05-25 13:38:01,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt... +30: [2023-05-25 13:38:01,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +24: [2023-05-25 13:38:01,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:01,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +28: [2023-05-25 13:38:01,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +26: [2023-05-25 13:38:01,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +27: [2023-05-25 13:38:01,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +27: [2023-05-25 13:38:01,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +24: [2023-05-25 13:38:01,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +21: [2023-05-25 13:38:01,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +24: [2023-05-25 13:38:01,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt... +15: [2023-05-25 13:38:01,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt. +15: [2023-05-25 13:38:01,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt. +26: [2023-05-25 13:38:01,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +21: [2023-05-25 13:38:01,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +26: [2023-05-25 13:38:01,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt... +24: [2023-05-25 13:38:01,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt... +25: [2023-05-25 13:38:01,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +25: [2023-05-25 13:38:01,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +26: [2023-05-25 13:38:01,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt... +30: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +27: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt. +27: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt. +31: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +26: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt. +29: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt. +29: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_03-model_states.pt. +30: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt. +31: [2023-05-25 13:38:01,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +16: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +27: [2023-05-25 13:38:01,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +24: [2023-05-25 13:38:01,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +26: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +29: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +16: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +29: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +29: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +16: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +20: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +27: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt... +20: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +24: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt... + 4: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +26: [2023-05-25 13:38:01,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +26: [2023-05-25 13:38:01,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt... +26: [2023-05-25 13:38:01,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:01,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +15: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +29: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +29: [2023-05-25 13:38:01,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt... +29: [2023-05-25 13:38:01,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +11: [2023-05-25 13:38:01,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt. +15: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +26: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt... +11: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt. +29: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt... +18: [2023-05-25 13:38:01,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt. +24: [2023-05-25 13:38:01,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +18: [2023-05-25 13:38:01,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt. +12: [2023-05-25 13:38:01,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:01,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +15: [2023-05-25 13:38:01,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:01,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +13: [2023-05-25 13:38:01,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +13: [2023-05-25 13:38:01,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:01,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +24: [2023-05-25 13:38:01,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt... +15: [2023-05-25 13:38:01,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:01,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +14: [2023-05-25 13:38:01,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +14: [2023-05-25 13:38:01,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +10: [2023-05-25 13:38:01,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +27: [2023-05-25 13:38:01,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +10: [2023-05-25 13:38:01,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:01,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +12: [2023-05-25 13:38:01,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:01,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +26: [2023-05-25 13:38:01,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:01,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +10: [2023-05-25 13:38:01,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +10: [2023-05-25 13:38:01,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +13: [2023-05-25 13:38:01,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +12: [2023-05-25 13:38:01,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt... +12: [2023-05-25 13:38:01,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt... +13: [2023-05-25 13:38:01,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt... +14: [2023-05-25 13:38:01,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:01,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +29: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +14: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +27: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +13: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +28: [2023-05-25 13:38:01,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt. +28: [2023-05-25 13:38:01,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +10: [2023-05-25 13:38:01,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt... +29: [2023-05-25 13:38:01,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +29: [2023-05-25 13:38:01,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +17: [2023-05-25 13:38:01,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +30: [2023-05-25 13:38:01,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +17: [2023-05-25 13:38:01,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +10: [2023-05-25 13:38:01,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt... +14: [2023-05-25 13:38:01,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +14: [2023-05-25 13:38:01,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +11: [2023-05-25 13:38:01,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +30: [2023-05-25 13:38:01,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt... +29: [2023-05-25 13:38:01,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt... +29: [2023-05-25 13:38:01,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt... +17: [2023-05-25 13:38:01,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt. +17: [2023-05-25 13:38:01,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +14: [2023-05-25 13:38:01,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt. +11: [2023-05-25 13:38:01,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +28: [2023-05-25 13:38:01,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:01,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +14: [2023-05-25 13:38:01,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt... +18: [2023-05-25 13:38:01,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +19: [2023-05-25 13:38:01,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt. +19: [2023-05-25 13:38:01,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt. +27: [2023-05-25 13:38:01,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt. +10: [2023-05-25 13:38:01,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +10: [2023-05-25 13:38:01,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +25: [2023-05-25 13:38:01,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +25: [2023-05-25 13:38:01,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +18: [2023-05-25 13:38:01,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +27: [2023-05-25 13:38:01,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt... + 8: [2023-05-25 13:38:01,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +31: [2023-05-25 13:38:01,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt. +31: [2023-05-25 13:38:01,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_41-model_02-model_states.pt. + 5: [2023-05-25 13:38:01,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +31: [2023-05-25 13:38:01,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +25: [2023-05-25 13:38:01,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt... +25: [2023-05-25 13:38:01,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +28: [2023-05-25 13:38:01,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +13: [2023-05-25 13:38:01,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt. +30: [2023-05-25 13:38:01,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:01,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +28: [2023-05-25 13:38:01,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt... +28: [2023-05-25 13:38:01,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt... +13: [2023-05-25 13:38:01,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt... + 8: [2023-05-25 13:38:01,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt... + 4: [2023-05-25 13:38:01,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt. + 5: [2023-05-25 13:38:01,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt... + 5: [2023-05-25 13:38:01,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt. + 0: [2023-05-25 13:38:01,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt. + 7: [2023-05-25 13:38:01,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt. + 4: [2023-05-25 13:38:01,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt. + 2: [2023-05-25 13:38:01,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt. +15: [2023-05-25 13:38:01,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +30: [2023-05-25 13:38:01,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt... +27: [2023-05-25 13:38:01,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +21: [2023-05-25 13:38:01,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +31: [2023-05-25 13:38:01,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +21: [2023-05-25 13:38:01,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +30: [2023-05-25 13:38:01,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:01,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +27: [2023-05-25 13:38:01,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt... +21: [2023-05-25 13:38:01,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +31: [2023-05-25 13:38:01,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +30: [2023-05-25 13:38:01,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +21: [2023-05-25 13:38:01,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +15: [2023-05-25 13:38:01,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +19: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +27: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +15: [2023-05-25 13:38:01,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt. +19: [2023-05-25 13:38:01,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +16: [2023-05-25 13:38:01,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +30: [2023-05-25 13:38:01,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt... +30: [2023-05-25 13:38:01,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt... +16: [2023-05-25 13:38:01,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +15: [2023-05-25 13:38:01,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt. +27: [2023-05-25 13:38:01,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt... +29: [2023-05-25 13:38:01,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:01,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +10: [2023-05-25 13:38:01,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt. +10: [2023-05-25 13:38:01,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt. + 4: [2023-05-25 13:38:01,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt. +31: [2023-05-25 13:38:01,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt... +21: [2023-05-25 13:38:01,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +21: [2023-05-25 13:38:01,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +14: [2023-05-25 13:38:01,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +10: [2023-05-25 13:38:01,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt. +10: [2023-05-25 13:38:01,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt. + 1: [2023-05-25 13:38:01,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +23: [2023-05-25 13:38:01,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt. +11: [2023-05-25 13:38:01,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt. +23: [2023-05-25 13:38:01,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt. +11: [2023-05-25 13:38:01,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt. +12: [2023-05-25 13:38:01,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:01,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +13: [2023-05-25 13:38:01,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt. +13: [2023-05-25 13:38:01,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:01,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +13: [2023-05-25 13:38:01,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +15: [2023-05-25 13:38:01,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +29: [2023-05-25 13:38:01,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +20: [2023-05-25 13:38:01,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt. +20: [2023-05-25 13:38:01,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +21: [2023-05-25 13:38:01,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt... +21: [2023-05-25 13:38:01,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +15: [2023-05-25 13:38:01,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +22: [2023-05-25 13:38:01,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt. +22: [2023-05-25 13:38:01,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt. +14: [2023-05-25 13:38:01,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +29: [2023-05-25 13:38:01,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt. + 6: [2023-05-25 13:38:01,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt. + 0: [2023-05-25 13:38:01,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +11: [2023-05-25 13:38:01,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt. +16: [2023-05-25 13:38:01,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt. +15: [2023-05-25 13:38:01,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +15: [2023-05-25 13:38:01,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +16: [2023-05-25 13:38:01,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +27: [2023-05-25 13:38:01,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +15: [2023-05-25 13:38:01,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt... +15: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +10: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +29: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +10: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +19: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt. +11: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +19: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt. +27: [2023-05-25 13:38:01,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt... +11: [2023-05-25 13:38:01,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt... +14: [2023-05-25 13:38:01,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +12: [2023-05-25 13:38:01,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +29: [2023-05-25 13:38:01,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt. +18: [2023-05-25 13:38:01,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt. +23: [2023-05-25 13:38:01,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +29: [2023-05-25 13:38:01,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt... + 3: [2023-05-25 13:38:01,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt. +18: [2023-05-25 13:38:01,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt. +12: [2023-05-25 13:38:01,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +10: [2023-05-25 13:38:01,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +15: [2023-05-25 13:38:01,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +11: [2023-05-25 13:38:01,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt... +23: [2023-05-25 13:38:01,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +11: [2023-05-25 13:38:01,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt. +15: [2023-05-25 13:38:01,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +10: [2023-05-25 13:38:01,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +28: [2023-05-25 13:38:01,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:01,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt. + 4: [2023-05-25 13:38:01,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt. + 5: [2023-05-25 13:38:01,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +17: [2023-05-25 13:38:01,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +17: [2023-05-25 13:38:01,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +29: [2023-05-25 13:38:01,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt... +15: [2023-05-25 13:38:01,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt... +14: [2023-05-25 13:38:01,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt. + 2: [2023-05-25 13:38:01,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt. +12: [2023-05-25 13:38:01,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt. +19: [2023-05-25 13:38:01,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +14: [2023-05-25 13:38:01,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt. +12: [2023-05-25 13:38:01,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt. + 2: [2023-05-25 13:38:01,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_01-model_states.pt. +19: [2023-05-25 13:38:01,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +13: [2023-05-25 13:38:01,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +13: [2023-05-25 13:38:01,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt. +11: [2023-05-25 13:38:01,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +15: [2023-05-25 13:38:01,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +15: [2023-05-25 13:38:01,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt... +28: [2023-05-25 13:38:01,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt... +28: [2023-05-25 13:38:01,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +19: [2023-05-25 13:38:01,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +19: [2023-05-25 13:38:01,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +21: [2023-05-25 13:38:01,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +15: [2023-05-25 13:38:01,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt... +14: [2023-05-25 13:38:01,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +20: [2023-05-25 13:38:01,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +29: [2023-05-25 13:38:01,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt... +17: [2023-05-25 13:38:01,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +17: [2023-05-25 13:38:01,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt. +20: [2023-05-25 13:38:01,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +22: [2023-05-25 13:38:01,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,808] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt. +22: [2023-05-25 13:38:01,808] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt. +12: [2023-05-25 13:38:01,808] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt. + 6: [2023-05-25 13:38:01,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,808] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt. + 1: [2023-05-25 13:38:01,809] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +19: [2023-05-25 13:38:01,810] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,810] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +12: [2023-05-25 13:38:01,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt. + 1: [2023-05-25 13:38:01,810] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +22: [2023-05-25 13:38:01,810] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +22: [2023-05-25 13:38:01,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt. + 4: [2023-05-25 13:38:01,810] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +31: [2023-05-25 13:38:01,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,810] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +19: [2023-05-25 13:38:01,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +13: [2023-05-25 13:38:01,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt. +22: [2023-05-25 13:38:01,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +18: [2023-05-25 13:38:01,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +23: [2023-05-25 13:38:01,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt. +23: [2023-05-25 13:38:01,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt. +13: [2023-05-25 13:38:01,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_02-model_states.pt. +21: [2023-05-25 13:38:01,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt. + 5: [2023-05-25 13:38:01,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +18: [2023-05-25 13:38:01,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +22: [2023-05-25 13:38:01,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +18: [2023-05-25 13:38:01,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt. +21: [2023-05-25 13:38:01,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt. +31: [2023-05-25 13:38:01,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +19: [2023-05-25 13:38:01,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +21: [2023-05-25 13:38:01,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +17: [2023-05-25 13:38:01,814] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:01,814] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +18: [2023-05-25 13:38:01,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt. +31: [2023-05-25 13:38:01,814] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt... +14: [2023-05-25 13:38:01,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +12: [2023-05-25 13:38:01,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +17: [2023-05-25 13:38:01,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +19: [2023-05-25 13:38:01,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt... +31: [2023-05-25 13:38:01,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt... + 5: [2023-05-25 13:38:01,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +22: [2023-05-25 13:38:01,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +16: [2023-05-25 13:38:01,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt. +16: [2023-05-25 13:38:01,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_01-model_states.pt. +12: [2023-05-25 13:38:01,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +14: [2023-05-25 13:38:01,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +19: [2023-05-25 13:38:01,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt. + 7: [2023-05-25 13:38:01,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt. + 6: [2023-05-25 13:38:01,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +13: [2023-05-25 13:38:01,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +21: [2023-05-25 13:38:01,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +17: [2023-05-25 13:38:01,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +17: [2023-05-25 13:38:01,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +20: [2023-05-25 13:38:01,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:01,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt. +20: [2023-05-25 13:38:01,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +20: [2023-05-25 13:38:01,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +20: [2023-05-25 13:38:01,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:01,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt. + 8: [2023-05-25 13:38:01,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +19: [2023-05-25 13:38:01,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt... + 5: [2023-05-25 13:38:01,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt. +22: [2023-05-25 13:38:01,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt. + 4: [2023-05-25 13:38:01,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +13: [2023-05-25 13:38:01,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +20: [2023-05-25 13:38:01,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +20: [2023-05-25 13:38:01,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +20: [2023-05-25 13:38:01,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt... +20: [2023-05-25 13:38:01,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt... +12: [2023-05-25 13:38:01,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt... + 4: [2023-05-25 13:38:01,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt... +15: [2023-05-25 13:38:01,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,825] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,825] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +15: [2023-05-25 13:38:01,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +22: [2023-05-25 13:38:01,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt. + 4: [2023-05-25 13:38:01,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +13: [2023-05-25 13:38:01,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +23: [2023-05-25 13:38:01,825] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:01,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:01,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +12: [2023-05-25 13:38:01,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +23: [2023-05-25 13:38:01,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +23: [2023-05-25 13:38:01,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +15: [2023-05-25 13:38:01,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt... +15: [2023-05-25 13:38:01,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt... +23: [2023-05-25 13:38:01,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt... +22: [2023-05-25 13:38:01,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt. +21: [2023-05-25 13:38:01,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +21: [2023-05-25 13:38:01,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +13: [2023-05-25 13:38:01,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt... +21: [2023-05-25 13:38:01,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +18: [2023-05-25 13:38:01,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +22: [2023-05-25 13:38:01,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt... +18: [2023-05-25 13:38:01,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +23: [2023-05-25 13:38:01,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt... + 4: [2023-05-25 13:38:01,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +23: [2023-05-25 13:38:01,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +17: [2023-05-25 13:38:01,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt. +17: [2023-05-25 13:38:01,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt. +11: [2023-05-25 13:38:01,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +16: [2023-05-25 13:38:01,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +16: [2023-05-25 13:38:01,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +16: [2023-05-25 13:38:01,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +14: [2023-05-25 13:38:01,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt. +11: [2023-05-25 13:38:01,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt... +18: [2023-05-25 13:38:01,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +14: [2023-05-25 13:38:01,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt. +18: [2023-05-25 13:38:01,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt. +18: [2023-05-25 13:38:01,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +18: [2023-05-25 13:38:01,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +10: [2023-05-25 13:38:01,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +10: [2023-05-25 13:38:01,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +10: [2023-05-25 13:38:01,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +10: [2023-05-25 13:38:01,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt. + 4: [2023-05-25 13:38:01,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt... +18: [2023-05-25 13:38:01,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt... + 5: [2023-05-25 13:38:01,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt... +16: [2023-05-25 13:38:01,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +18: [2023-05-25 13:38:01,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +18: [2023-05-25 13:38:01,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt... +11: [2023-05-25 13:38:01,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +18: [2023-05-25 13:38:01,837] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +20: [2023-05-25 13:38:01,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,837] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +19: [2023-05-25 13:38:01,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +10: [2023-05-25 13:38:01,838] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt... +10: [2023-05-25 13:38:01,838] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt... +10: [2023-05-25 13:38:01,838] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt... +10: [2023-05-25 13:38:01,838] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt... +11: [2023-05-25 13:38:01,838] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +23: [2023-05-25 13:38:01,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt. +23: [2023-05-25 13:38:01,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +23: [2023-05-25 13:38:01,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +23: [2023-05-25 13:38:01,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt. +19: [2023-05-25 13:38:01,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +22: [2023-05-25 13:38:01,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt... +20: [2023-05-25 13:38:01,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt... + 6: [2023-05-25 13:38:01,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +19: [2023-05-25 13:38:01,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +23: [2023-05-25 13:38:01,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt... +19: [2023-05-25 13:38:01,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +23: [2023-05-25 13:38:01,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt... +22: [2023-05-25 13:38:01,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt... + 9: [2023-05-25 13:38:01,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt... +19: [2023-05-25 13:38:01,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt. + 1: [2023-05-25 13:38:01,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +20: [2023-05-25 13:38:01,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +17: [2023-05-25 13:38:01,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt. +19: [2023-05-25 13:38:01,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt. +17: [2023-05-25 13:38:01,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_02-model_states.pt. + 6: [2023-05-25 13:38:01,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt... + 6: [2023-05-25 13:38:01,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt... +14: [2023-05-25 13:38:01,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +19: [2023-05-25 13:38:01,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt... +12: [2023-05-25 13:38:01,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +19: [2023-05-25 13:38:01,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt... +20: [2023-05-25 13:38:01,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt... +17: [2023-05-25 13:38:01,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +17: [2023-05-25 13:38:01,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,846] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,846] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +14: [2023-05-25 13:38:01,846] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,846] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt... +12: [2023-05-25 13:38:01,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt... +14: [2023-05-25 13:38:01,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt... +18: [2023-05-25 13:38:01,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +14: [2023-05-25 13:38:01,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +22: [2023-05-25 13:38:01,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,848] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt. + 1: [2023-05-25 13:38:01,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,848] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +23: [2023-05-25 13:38:01,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +12: [2023-05-25 13:38:01,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +18: [2023-05-25 13:38:01,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt. +13: [2023-05-25 13:38:01,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +14: [2023-05-25 13:38:01,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt... +23: [2023-05-25 13:38:01,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt... +18: [2023-05-25 13:38:01,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +12: [2023-05-25 13:38:01,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt... +13: [2023-05-25 13:38:01,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt... + 9: [2023-05-25 13:38:01,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +23: [2023-05-25 13:38:01,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt... +14: [2023-05-25 13:38:01,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt... + 5: [2023-05-25 13:38:01,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +23: [2023-05-25 13:38:01,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt... +20: [2023-05-25 13:38:01,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +18: [2023-05-25 13:38:01,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt... + 6: [2023-05-25 13:38:01,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt. + 6: [2023-05-25 13:38:01,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt. +16: [2023-05-25 13:38:01,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt... +23: [2023-05-25 13:38:01,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +23: [2023-05-25 13:38:01,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +16: [2023-05-25 13:38:01,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt... +22: [2023-05-25 13:38:01,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +19: [2023-05-25 13:38:01,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +19: [2023-05-25 13:38:01,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +19: [2023-05-25 13:38:01,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +17: [2023-05-25 13:38:01,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +17: [2023-05-25 13:38:01,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt... + 2: [2023-05-25 13:38:01,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:01,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +12: [2023-05-25 13:38:01,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +13: [2023-05-25 13:38:01,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +22: [2023-05-25 13:38:01,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt... +22: [2023-05-25 13:38:01,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +17: [2023-05-25 13:38:01,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +21: [2023-05-25 13:38:01,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +21: [2023-05-25 13:38:01,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +17: [2023-05-25 13:38:01,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +19: [2023-05-25 13:38:01,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt. + 7: [2023-05-25 13:38:01,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt. +13: [2023-05-25 13:38:01,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt... +17: [2023-05-25 13:38:01,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt... +17: [2023-05-25 13:38:01,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt... +20: [2023-05-25 13:38:01,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt. +12: [2023-05-25 13:38:01,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt... +12: [2023-05-25 13:38:01,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt... + 8: [2023-05-25 13:38:01,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt... + 8: [2023-05-25 13:38:01,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt... + 9: [2023-05-25 13:38:01,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_03-model_states.pt. +22: [2023-05-25 13:38:01,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt... + 2: [2023-05-25 13:38:01,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt... +22: [2023-05-25 13:38:01,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt... +21: [2023-05-25 13:38:01,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt... +21: [2023-05-25 13:38:01,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt... +11: [2023-05-25 13:38:01,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt. + 3: [2023-05-25 13:38:01,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt. +11: [2023-05-25 13:38:01,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_19-model_01-model_states.pt. + 3: [2023-05-25 13:38:01,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt. +18: [2023-05-25 13:38:01,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +22: [2023-05-25 13:38:01,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +22: [2023-05-25 13:38:01,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +18: [2023-05-25 13:38:01,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,864] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,864] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +18: [2023-05-25 13:38:01,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt... +23: [2023-05-25 13:38:01,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +18: [2023-05-25 13:38:01,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt... +20: [2023-05-25 13:38:01,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:01,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt... +18: [2023-05-25 13:38:01,867] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt... + 6: [2023-05-25 13:38:01,868] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,868] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,868] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt. +16: [2023-05-25 13:38:01,868] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:01,868] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:01,868] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt. +23: [2023-05-25 13:38:01,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +22: [2023-05-25 13:38:01,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +16: [2023-05-25 13:38:01,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt... +16: [2023-05-25 13:38:01,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt... +18: [2023-05-25 13:38:01,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:01,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt... +22: [2023-05-25 13:38:01,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt... + 7: [2023-05-25 13:38:01,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt... + 3: [2023-05-25 13:38:01,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +11: [2023-05-25 13:38:01,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +20: [2023-05-25 13:38:01,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +14: [2023-05-25 13:38:01,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +14: [2023-05-25 13:38:01,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +11: [2023-05-25 13:38:01,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt. + 0: [2023-05-25 13:38:01,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt. +23: [2023-05-25 13:38:01,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt... +22: [2023-05-25 13:38:01,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt... + 7: [2023-05-25 13:38:01,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +17: [2023-05-25 13:38:01,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +14: [2023-05-25 13:38:01,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt... +14: [2023-05-25 13:38:01,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt. + 2: [2023-05-25 13:38:01,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_03-model_states.pt. +18: [2023-05-25 13:38:01,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:01,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +17: [2023-05-25 13:38:01,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt... +22: [2023-05-25 13:38:01,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt... +23: [2023-05-25 13:38:01,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +18: [2023-05-25 13:38:01,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,884] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +17: [2023-05-25 13:38:01,885] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +22: [2023-05-25 13:38:01,885] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +19: [2023-05-25 13:38:01,885] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +23: [2023-05-25 13:38:01,885] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +19: [2023-05-25 13:38:01,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt... +17: [2023-05-25 13:38:01,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt... +22: [2023-05-25 13:38:01,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt... +23: [2023-05-25 13:38:01,889] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,889] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +19: [2023-05-25 13:38:01,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,890] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,891] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... +19: [2023-05-25 13:38:01,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt... +23: [2023-05-25 13:38:01,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt. +17: [2023-05-25 13:38:01,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +17: [2023-05-25 13:38:01,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_07-model_02-model_states.pt. +16: [2023-05-25 13:38:01,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt. +16: [2023-05-25 13:38:01,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt. +23: [2023-05-25 13:38:01,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt... +17: [2023-05-25 13:38:01,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt... +17: [2023-05-25 13:38:01,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,896] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,896] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,898] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,899] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,899] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,899] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,899] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,900] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt... +11: [2023-05-25 13:38:01,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,904] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,905] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt... +11: [2023-05-25 13:38:01,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt... + 9: [2023-05-25 13:38:01,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,908] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +11: [2023-05-25 13:38:01,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_00-model_states.pt. +16: [2023-05-25 13:38:01,909] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,909] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt... + 7: [2023-05-25 13:38:01,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +16: [2023-05-25 13:38:01,910] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt... +11: [2023-05-25 13:38:01,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt... +30: [2023-05-25 13:38:01,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt. +30: [2023-05-25 13:38:01,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt. + 2: [2023-05-25 13:38:01,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +30: [2023-05-25 13:38:01,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt... +21: [2023-05-25 13:38:01,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt. +21: [2023-05-25 13:38:01,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt. +30: [2023-05-25 13:38:01,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +31: [2023-05-25 13:38:01,929] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,929] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +31: [2023-05-25 13:38:01,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +31: [2023-05-25 13:38:01,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt... +31: [2023-05-25 13:38:01,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +20: [2023-05-25 13:38:01,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt. +20: [2023-05-25 13:38:01,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_30-model_03-model_states.pt. + 2: [2023-05-25 13:38:01,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt... +16: [2023-05-25 13:38:01,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +21: [2023-05-25 13:38:01,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +21: [2023-05-25 13:38:01,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_00-model_states.pt. +16: [2023-05-25 13:38:01,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt... +16: [2023-05-25 13:38:01,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt... +25: [2023-05-25 13:38:01,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +16: [2023-05-25 13:38:01,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt... +25: [2023-05-25 13:38:01,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +27: [2023-05-25 13:38:01,946] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt. +25: [2023-05-25 13:38:01,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +20: [2023-05-25 13:38:01,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +20: [2023-05-25 13:38:01,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt... +25: [2023-05-25 13:38:01,947] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:01,949] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt. +24: [2023-05-25 13:38:01,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt. +27: [2023-05-25 13:38:01,951] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt. +28: [2023-05-25 13:38:01,952] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt. +28: [2023-05-25 13:38:01,952] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt. +25: [2023-05-25 13:38:01,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt. +25: [2023-05-25 13:38:01,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt. +24: [2023-05-25 13:38:01,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:01,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:01,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:01,958] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt. +29: [2023-05-25 13:38:01,958] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt. +27: [2023-05-25 13:38:01,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +31: [2023-05-25 13:38:01,959] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +26: [2023-05-25 13:38:01,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt. +24: [2023-05-25 13:38:01,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +26: [2023-05-25 13:38:01,961] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt. +28: [2023-05-25 13:38:01,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:01,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:01,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +28: [2023-05-25 13:38:01,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +21: [2023-05-25 13:38:01,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +25: [2023-05-25 13:38:01,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +25: [2023-05-25 13:38:01,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +31: [2023-05-25 13:38:01,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +21: [2023-05-25 13:38:01,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +21: [2023-05-25 13:38:01,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt... +25: [2023-05-25 13:38:01,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt. +25: [2023-05-25 13:38:01,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt. +27: [2023-05-25 13:38:01,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +21: [2023-05-25 13:38:01,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt... +31: [2023-05-25 13:38:01,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:01,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:01,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +26: [2023-05-25 13:38:01,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +26: [2023-05-25 13:38:01,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:01,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +30: [2023-05-25 13:38:01,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +30: [2023-05-25 13:38:01,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +30: [2023-05-25 13:38:01,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +20: [2023-05-25 13:38:01,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +20: [2023-05-25 13:38:01,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_00-model_states.pt. +30: [2023-05-25 13:38:01,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt... +30: [2023-05-25 13:38:01,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt... +30: [2023-05-25 13:38:01,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:01,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +26: [2023-05-25 13:38:01,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt. +31: [2023-05-25 13:38:01,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +20: [2023-05-25 13:38:01,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt... +20: [2023-05-25 13:38:01,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt... +26: [2023-05-25 13:38:01,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt. +27: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +25: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +27: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +25: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +27: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt. +25: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +25: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +27: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt. +27: [2023-05-25 13:38:01,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +25: [2023-05-25 13:38:01,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt. +25: [2023-05-25 13:38:01,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt. +27: [2023-05-25 13:38:01,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:01,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt. +30: [2023-05-25 13:38:01,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt. +27: [2023-05-25 13:38:01,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt. +27: [2023-05-25 13:38:01,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt. +24: [2023-05-25 13:38:01,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt. +24: [2023-05-25 13:38:01,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt. +26: [2023-05-25 13:38:01,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +26: [2023-05-25 13:38:01,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +28: [2023-05-25 13:38:01,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +28: [2023-05-25 13:38:01,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +28: [2023-05-25 13:38:01,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:01,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +25: [2023-05-25 13:38:01,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +24: [2023-05-25 13:38:01,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +28: [2023-05-25 13:38:01,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +27: [2023-05-25 13:38:01,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +28: [2023-05-25 13:38:02,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt... +24: [2023-05-25 13:38:02,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +28: [2023-05-25 13:38:02,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt... +28: [2023-05-25 13:38:02,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +28: [2023-05-25 13:38:02,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +25: [2023-05-25 13:38:02,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:02,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:02,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt... +25: [2023-05-25 13:38:02,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +27: [2023-05-25 13:38:02,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:02,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:02,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt... +30: [2023-05-25 13:38:02,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt. +25: [2023-05-25 13:38:02,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +27: [2023-05-25 13:38:02,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +25: [2023-05-25 13:38:02,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +28: [2023-05-25 13:38:02,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt. +24: [2023-05-25 13:38:02,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +28: [2023-05-25 13:38:02,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt. +27: [2023-05-25 13:38:02,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +25: [2023-05-25 13:38:02,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt... +27: [2023-05-25 13:38:02,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:02,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt. +27: [2023-05-25 13:38:02,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +27: [2023-05-25 13:38:02,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt... +28: [2023-05-25 13:38:02,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt. +27: [2023-05-25 13:38:02,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt... +28: [2023-05-25 13:38:02,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt. +24: [2023-05-25 13:38:02,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:02,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +31: [2023-05-25 13:38:02,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt. +31: [2023-05-25 13:38:02,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt. +31: [2023-05-25 13:38:02,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt. +31: [2023-05-25 13:38:02,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt. +24: [2023-05-25 13:38:02,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt. +24: [2023-05-25 13:38:02,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt. +26: [2023-05-25 13:38:02,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +29: [2023-05-25 13:38:02,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +29: [2023-05-25 13:38:02,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +15: [2023-05-25 13:38:02,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt. +29: [2023-05-25 13:38:02,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +26: [2023-05-25 13:38:02,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +25: [2023-05-25 13:38:02,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +15: [2023-05-25 13:38:02,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt. +29: [2023-05-25 13:38:02,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt. +29: [2023-05-25 13:38:02,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_03-model_states.pt. +29: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt. +26: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt. +29: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt... +25: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt... +26: [2023-05-25 13:38:02,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:02,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +30: [2023-05-25 13:38:02,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:02,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +26: [2023-05-25 13:38:02,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt. +26: [2023-05-25 13:38:02,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_01-model_states.pt. +29: [2023-05-25 13:38:02,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +28: [2023-05-25 13:38:02,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +25: [2023-05-25 13:38:02,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +25: [2023-05-25 13:38:02,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +29: [2023-05-25 13:38:02,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +28: [2023-05-25 13:38:02,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:02,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +14: [2023-05-25 13:38:02,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt. +29: [2023-05-25 13:38:02,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt... +26: [2023-05-25 13:38:02,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +14: [2023-05-25 13:38:02,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt. +26: [2023-05-25 13:38:02,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt. +25: [2023-05-25 13:38:02,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt... +25: [2023-05-25 13:38:02,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt... +28: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +27: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +27: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +28: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +31: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +16: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +26: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt... +26: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt... +30: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +16: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +16: [2023-05-25 13:38:02,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +31: [2023-05-25 13:38:02,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +16: [2023-05-25 13:38:02,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +15: [2023-05-25 13:38:02,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +31: [2023-05-25 13:38:02,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +31: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +15: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +19: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +19: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +29: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +25: [2023-05-25 13:38:02,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +29: [2023-05-25 13:38:02,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +19: [2023-05-25 13:38:02,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +19: [2023-05-25 13:38:02,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +10: [2023-05-25 13:38:02,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt. +10: [2023-05-25 13:38:02,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt. +30: [2023-05-25 13:38:02,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +26: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +25: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +26: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +25: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt... +14: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +27: [2023-05-25 13:38:02,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +25: [2023-05-25 13:38:02,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt... + 6: [2023-05-25 13:38:02,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +28: [2023-05-25 13:38:02,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +14: [2023-05-25 13:38:02,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +26: [2023-05-25 13:38:02,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +26: [2023-05-25 13:38:02,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +27: [2023-05-25 13:38:02,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt... +29: [2023-05-25 13:38:02,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:02,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +20: [2023-05-25 13:38:02,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt. +20: [2023-05-25 13:38:02,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt. +30: [2023-05-25 13:38:02,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +27: [2023-05-25 13:38:02,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +27: [2023-05-25 13:38:02,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +26: [2023-05-25 13:38:02,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt... +26: [2023-05-25 13:38:02,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt... +24: [2023-05-25 13:38:02,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt... +11: [2023-05-25 13:38:02,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt. +28: [2023-05-25 13:38:02,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +11: [2023-05-25 13:38:02,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt. + 8: [2023-05-25 13:38:02,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt. + 8: [2023-05-25 13:38:02,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt. +26: [2023-05-25 13:38:02,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +10: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +30: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +13: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt. +13: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt. +30: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:02,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +10: [2023-05-25 13:38:02,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +30: [2023-05-25 13:38:02,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt... +30: [2023-05-25 13:38:02,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt... +22: [2023-05-25 13:38:02,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +27: [2023-05-25 13:38:02,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +27: [2023-05-25 13:38:02,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +22: [2023-05-25 13:38:02,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +24: [2023-05-25 13:38:02,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt... +30: [2023-05-25 13:38:02,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +22: [2023-05-25 13:38:02,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt. + 9: [2023-05-25 13:38:02,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt. +22: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +27: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt... +27: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt... +28: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +28: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt. +14: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt. +13: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt. +31: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt. +14: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt. +30: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt... +31: [2023-05-25 13:38:02,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_42-model_02-model_states.pt. +13: [2023-05-25 13:38:02,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt. + 9: [2023-05-25 13:38:02,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt. +20: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +30: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +28: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt... +29: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +21: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +21: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +10: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt. +10: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt. +29: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:02,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt... +20: [2023-05-25 13:38:02,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +13: [2023-05-25 13:38:02,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt. +13: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt. +27: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +21: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +16: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +11: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +28: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +11: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +21: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +27: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt... +12: [2023-05-25 13:38:02,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt. +28: [2023-05-25 13:38:02,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +29: [2023-05-25 13:38:02,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +12: [2023-05-25 13:38:02,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt. +12: [2023-05-25 13:38:02,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt. +26: [2023-05-25 13:38:02,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +13: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +12: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_02-model_states.pt. +26: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:02,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +28: [2023-05-25 13:38:02,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt... +13: [2023-05-25 13:38:02,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +16: [2023-05-25 13:38:02,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +28: [2023-05-25 13:38:02,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +28: [2023-05-25 13:38:02,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +15: [2023-05-25 13:38:02,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +15: [2023-05-25 13:38:02,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +15: [2023-05-25 13:38:02,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +24: [2023-05-25 13:38:02,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt... +31: [2023-05-25 13:38:02,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +28: [2023-05-25 13:38:02,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt... +28: [2023-05-25 13:38:02,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt... +15: [2023-05-25 13:38:02,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +31: [2023-05-25 13:38:02,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +15: [2023-05-25 13:38:02,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +30: [2023-05-25 13:38:02,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +14: [2023-05-25 13:38:02,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +10: [2023-05-25 13:38:02,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +14: [2023-05-25 13:38:02,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +10: [2023-05-25 13:38:02,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +15: [2023-05-25 13:38:02,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +31: [2023-05-25 13:38:02,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:02,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +30: [2023-05-25 13:38:02,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt... +15: [2023-05-25 13:38:02,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt... + 3: [2023-05-25 13:38:02,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt. +31: [2023-05-25 13:38:02,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt... +13: [2023-05-25 13:38:02,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt. +29: [2023-05-25 13:38:02,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +13: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +26: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +15: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt... +24: [2023-05-25 13:38:02,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt... + 9: [2023-05-25 13:38:02,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +29: [2023-05-25 13:38:02,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +10: [2023-05-25 13:38:02,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +19: [2023-05-25 13:38:02,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +31: [2023-05-25 13:38:02,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +31: [2023-05-25 13:38:02,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +31: [2023-05-25 13:38:02,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +18: [2023-05-25 13:38:02,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +29: [2023-05-25 13:38:02,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +26: [2023-05-25 13:38:02,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +10: [2023-05-25 13:38:02,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +19: [2023-05-25 13:38:02,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +18: [2023-05-25 13:38:02,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +26: [2023-05-25 13:38:02,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt... +16: [2023-05-25 13:38:02,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +18: [2023-05-25 13:38:02,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +12: [2023-05-25 13:38:02,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +10: [2023-05-25 13:38:02,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +10: [2023-05-25 13:38:02,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +13: [2023-05-25 13:38:02,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +12: [2023-05-25 13:38:02,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +18: [2023-05-25 13:38:02,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +12: [2023-05-25 13:38:02,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +13: [2023-05-25 13:38:02,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +26: [2023-05-25 13:38:02,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt... +21: [2023-05-25 13:38:02,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +26: [2023-05-25 13:38:02,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +31: [2023-05-25 13:38:02,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt... +31: [2023-05-25 13:38:02,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt... +31: [2023-05-25 13:38:02,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt... +29: [2023-05-25 13:38:02,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt... + 9: [2023-05-25 13:38:02,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +12: [2023-05-25 13:38:02,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +29: [2023-05-25 13:38:02,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +29: [2023-05-25 13:38:02,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +16: [2023-05-25 13:38:02,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:02,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt. +17: [2023-05-25 13:38:02,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +17: [2023-05-25 13:38:02,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt. + 5: [2023-05-25 13:38:02,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt. + 5: [2023-05-25 13:38:02,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt. +29: [2023-05-25 13:38:02,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt... + 4: [2023-05-25 13:38:02,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt. +15: [2023-05-25 13:38:02,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt. +15: [2023-05-25 13:38:02,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +10: [2023-05-25 13:38:02,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +17: [2023-05-25 13:38:02,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +10: [2023-05-25 13:38:02,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt... +21: [2023-05-25 13:38:02,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +10: [2023-05-25 13:38:02,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +15: [2023-05-25 13:38:02,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt. +15: [2023-05-25 13:38:02,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt. +10: [2023-05-25 13:38:02,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt... +19: [2023-05-25 13:38:02,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt. +22: [2023-05-25 13:38:02,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +18: [2023-05-25 13:38:02,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt. +18: [2023-05-25 13:38:02,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt. +19: [2023-05-25 13:38:02,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +21: [2023-05-25 13:38:02,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +23: [2023-05-25 13:38:02,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt. +23: [2023-05-25 13:38:02,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:02,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +23: [2023-05-25 13:38:02,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +23: [2023-05-25 13:38:02,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt. +31: [2023-05-25 13:38:02,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +20: [2023-05-25 13:38:02,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +15: [2023-05-25 13:38:02,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +15: [2023-05-25 13:38:02,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +20: [2023-05-25 13:38:02,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt... +10: [2023-05-25 13:38:02,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +15: [2023-05-25 13:38:02,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +21: [2023-05-25 13:38:02,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +10: [2023-05-25 13:38:02,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +10: [2023-05-25 13:38:02,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +31: [2023-05-25 13:38:02,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt... +10: [2023-05-25 13:38:02,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt... +20: [2023-05-25 13:38:02,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +10: [2023-05-25 13:38:02,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt... +11: [2023-05-25 13:38:02,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt. +11: [2023-05-25 13:38:02,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +20: [2023-05-25 13:38:02,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +31: [2023-05-25 13:38:02,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_00-model_states.pt. +15: [2023-05-25 13:38:02,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +15: [2023-05-25 13:38:02,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +18: [2023-05-25 13:38:02,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +20: [2023-05-25 13:38:02,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +20: [2023-05-25 13:38:02,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +23: [2023-05-25 13:38:02,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt. +20: [2023-05-25 13:38:02,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt... +20: [2023-05-25 13:38:02,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +31: [2023-05-25 13:38:02,098] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt... +23: [2023-05-25 13:38:02,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt. +22: [2023-05-25 13:38:02,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +18: [2023-05-25 13:38:02,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +10: [2023-05-25 13:38:02,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +22: [2023-05-25 13:38:02,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt. + 3: [2023-05-25 13:38:02,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt. +18: [2023-05-25 13:38:02,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt. +18: [2023-05-25 13:38:02,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt. + 3: [2023-05-25 13:38:02,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt. +11: [2023-05-25 13:38:02,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +11: [2023-05-25 13:38:02,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +11: [2023-05-25 13:38:02,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +15: [2023-05-25 13:38:02,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +11: [2023-05-25 13:38:02,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt. + 2: [2023-05-25 13:38:02,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt. +22: [2023-05-25 13:38:02,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt. +22: [2023-05-25 13:38:02,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +11: [2023-05-25 13:38:02,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +16: [2023-05-25 13:38:02,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt. +19: [2023-05-25 13:38:02,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt. +11: [2023-05-25 13:38:02,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +19: [2023-05-25 13:38:02,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt. +16: [2023-05-25 13:38:02,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt. +11: [2023-05-25 13:38:02,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt... +11: [2023-05-25 13:38:02,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt... +18: [2023-05-25 13:38:02,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +17: [2023-05-25 13:38:02,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt. + 7: [2023-05-25 13:38:02,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_01-model_states.pt. +18: [2023-05-25 13:38:02,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +17: [2023-05-25 13:38:02,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt. + 5: [2023-05-25 13:38:02,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt. + 7: [2023-05-25 13:38:02,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +11: [2023-05-25 13:38:02,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +10: [2023-05-25 13:38:02,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +11: [2023-05-25 13:38:02,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +17: [2023-05-25 13:38:02,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt. +15: [2023-05-25 13:38:02,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:02,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +23: [2023-05-25 13:38:02,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +17: [2023-05-25 13:38:02,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt. +15: [2023-05-25 13:38:02,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +17: [2023-05-25 13:38:02,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +17: [2023-05-25 13:38:02,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt. + 1: [2023-05-25 13:38:02,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +18: [2023-05-25 13:38:02,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +23: [2023-05-25 13:38:02,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +21: [2023-05-25 13:38:02,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt. +21: [2023-05-25 13:38:02,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_02-model_states.pt. +15: [2023-05-25 13:38:02,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +22: [2023-05-25 13:38:02,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +18: [2023-05-25 13:38:02,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +15: [2023-05-25 13:38:02,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +22: [2023-05-25 13:38:02,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +23: [2023-05-25 13:38:02,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +19: [2023-05-25 13:38:02,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +16: [2023-05-25 13:38:02,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +19: [2023-05-25 13:38:02,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +18: [2023-05-25 13:38:02,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +20: [2023-05-25 13:38:02,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +16: [2023-05-25 13:38:02,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +18: [2023-05-25 13:38:02,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt... + 8: [2023-05-25 13:38:02,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt... + 5: [2023-05-25 13:38:02,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +17: [2023-05-25 13:38:02,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:02,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt... + 5: [2023-05-25 13:38:02,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt... +22: [2023-05-25 13:38:02,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt. +23: [2023-05-25 13:38:02,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +22: [2023-05-25 13:38:02,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt... + 8: [2023-05-25 13:38:02,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +17: [2023-05-25 13:38:02,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +20: [2023-05-25 13:38:02,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt. +17: [2023-05-25 13:38:02,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +18: [2023-05-25 13:38:02,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +21: [2023-05-25 13:38:02,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +14: [2023-05-25 13:38:02,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +14: [2023-05-25 13:38:02,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +21: [2023-05-25 13:38:02,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +14: [2023-05-25 13:38:02,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +17: [2023-05-25 13:38:02,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +18: [2023-05-25 13:38:02,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt... + 8: [2023-05-25 13:38:02,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt... + 8: [2023-05-25 13:38:02,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt... +17: [2023-05-25 13:38:02,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +14: [2023-05-25 13:38:02,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +15: [2023-05-25 13:38:02,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +15: [2023-05-25 13:38:02,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +14: [2023-05-25 13:38:02,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +14: [2023-05-25 13:38:02,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +14: [2023-05-25 13:38:02,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +14: [2023-05-25 13:38:02,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt... +18: [2023-05-25 13:38:02,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +23: [2023-05-25 13:38:02,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +11: [2023-05-25 13:38:02,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +14: [2023-05-25 13:38:02,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt... +14: [2023-05-25 13:38:02,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +15: [2023-05-25 13:38:02,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt... +15: [2023-05-25 13:38:02,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt... +20: [2023-05-25 13:38:02,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +14: [2023-05-25 13:38:02,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt... +14: [2023-05-25 13:38:02,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt... + 5: [2023-05-25 13:38:02,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt. +12: [2023-05-25 13:38:02,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +18: [2023-05-25 13:38:02,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt... + 0: [2023-05-25 13:38:02,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt. +12: [2023-05-25 13:38:02,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +12: [2023-05-25 13:38:02,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +23: [2023-05-25 13:38:02,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +12: [2023-05-25 13:38:02,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +22: [2023-05-25 13:38:02,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +11: [2023-05-25 13:38:02,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +22: [2023-05-25 13:38:02,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +13: [2023-05-25 13:38:02,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt... +12: [2023-05-25 13:38:02,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt... +12: [2023-05-25 13:38:02,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt... + 9: [2023-05-25 13:38:02,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +13: [2023-05-25 13:38:02,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +13: [2023-05-25 13:38:02,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +11: [2023-05-25 13:38:02,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +18: [2023-05-25 13:38:02,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +13: [2023-05-25 13:38:02,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt... +13: [2023-05-25 13:38:02,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +13: [2023-05-25 13:38:02,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +13: [2023-05-25 13:38:02,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +10: [2023-05-25 13:38:02,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt. +13: [2023-05-25 13:38:02,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +20: [2023-05-25 13:38:02,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +11: [2023-05-25 13:38:02,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt... +13: [2023-05-25 13:38:02,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +12: [2023-05-25 13:38:02,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +12: [2023-05-25 13:38:02,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt. +13: [2023-05-25 13:38:02,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt... +13: [2023-05-25 13:38:02,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt... +10: [2023-05-25 13:38:02,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt. + 9: [2023-05-25 13:38:02,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt... +18: [2023-05-25 13:38:02,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt... +13: [2023-05-25 13:38:02,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt... + 0: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +13: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt... + 4: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt... +13: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt... +13: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt... +19: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +19: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt... +13: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt... + 8: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +14: [2023-05-25 13:38:02,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +14: [2023-05-25 13:38:02,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt. + 9: [2023-05-25 13:38:02,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +17: [2023-05-25 13:38:02,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +17: [2023-05-25 13:38:02,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +23: [2023-05-25 13:38:02,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +19: [2023-05-25 13:38:02,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt... +19: [2023-05-25 13:38:02,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt... +22: [2023-05-25 13:38:02,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +15: [2023-05-25 13:38:02,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +19: [2023-05-25 13:38:02,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt. +19: [2023-05-25 13:38:02,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt... + 0: [2023-05-25 13:38:02,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt... + 0: [2023-05-25 13:38:02,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +16: [2023-05-25 13:38:02,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt. +16: [2023-05-25 13:38:02,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt. +23: [2023-05-25 13:38:02,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt... + 8: [2023-05-25 13:38:02,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +15: [2023-05-25 13:38:02,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt... +11: [2023-05-25 13:38:02,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +23: [2023-05-25 13:38:02,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +23: [2023-05-25 13:38:02,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt. +22: [2023-05-25 13:38:02,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt... +18: [2023-05-25 13:38:02,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +11: [2023-05-25 13:38:02,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +18: [2023-05-25 13:38:02,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt... + 9: [2023-05-25 13:38:02,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt. +10: [2023-05-25 13:38:02,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_03-model_states.pt. +23: [2023-05-25 13:38:02,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +11: [2023-05-25 13:38:02,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt... +11: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +22: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +10: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +23: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt... + 7: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +12: [2023-05-25 13:38:02,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +16: [2023-05-25 13:38:02,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +12: [2023-05-25 13:38:02,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +21: [2023-05-25 13:38:02,157] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt. +21: [2023-05-25 13:38:02,157] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt. +22: [2023-05-25 13:38:02,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt... +19: [2023-05-25 13:38:02,157] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt. +16: [2023-05-25 13:38:02,157] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt. + 7: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt... + 8: [2023-05-25 13:38:02,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +19: [2023-05-25 13:38:02,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt. +16: [2023-05-25 13:38:02,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt. +16: [2023-05-25 13:38:02,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt... +16: [2023-05-25 13:38:02,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +14: [2023-05-25 13:38:02,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +17: [2023-05-25 13:38:02,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +17: [2023-05-25 13:38:02,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +14: [2023-05-25 13:38:02,161] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,161] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +21: [2023-05-25 13:38:02,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,162] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt... +23: [2023-05-25 13:38:02,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +17: [2023-05-25 13:38:02,162] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt... +16: [2023-05-25 13:38:02,162] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt... +14: [2023-05-25 13:38:02,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +17: [2023-05-25 13:38:02,162] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt... +16: [2023-05-25 13:38:02,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +16: [2023-05-25 13:38:02,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt... +17: [2023-05-25 13:38:02,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt... + 3: [2023-05-25 13:38:02,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt... +22: [2023-05-25 13:38:02,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +19: [2023-05-25 13:38:02,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +23: [2023-05-25 13:38:02,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +21: [2023-05-25 13:38:02,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt... +19: [2023-05-25 13:38:02,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +14: [2023-05-25 13:38:02,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +17: [2023-05-25 13:38:02,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt... +22: [2023-05-25 13:38:02,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt... +22: [2023-05-25 13:38:02,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +13: [2023-05-25 13:38:02,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +21: [2023-05-25 13:38:02,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:02,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt. + 9: [2023-05-25 13:38:02,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +21: [2023-05-25 13:38:02,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +19: [2023-05-25 13:38:02,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +16: [2023-05-25 13:38:02,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +21: [2023-05-25 13:38:02,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +19: [2023-05-25 13:38:02,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +12: [2023-05-25 13:38:02,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt... +22: [2023-05-25 13:38:02,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt. +22: [2023-05-25 13:38:02,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt. +21: [2023-05-25 13:38:02,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt... +13: [2023-05-25 13:38:02,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +16: [2023-05-25 13:38:02,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt. + 4: [2023-05-25 13:38:02,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt. + 8: [2023-05-25 13:38:02,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +14: [2023-05-25 13:38:02,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +12: [2023-05-25 13:38:02,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt... +14: [2023-05-25 13:38:02,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +12: [2023-05-25 13:38:02,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,182] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,182] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +12: [2023-05-25 13:38:02,182] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +13: [2023-05-25 13:38:02,183] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +10: [2023-05-25 13:38:02,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +11: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt. +11: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_20-model_01-model_states.pt. +10: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +12: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt... + 2: [2023-05-25 13:38:02,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt... + 8: [2023-05-25 13:38:02,187] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:02,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt... +10: [2023-05-25 13:38:02,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt... + 1: [2023-05-25 13:38:02,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,187] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +22: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +22: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt... + 8: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt... + 4: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt... + 4: [2023-05-25 13:38:02,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt... + 4: [2023-05-25 13:38:02,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt... +14: [2023-05-25 13:38:02,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +13: [2023-05-25 13:38:02,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +21: [2023-05-25 13:38:02,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +20: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt. +21: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt... +23: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +20: [2023-05-25 13:38:02,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_03-model_states.pt. +14: [2023-05-25 13:38:02,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +14: [2023-05-25 13:38:02,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +19: [2023-05-25 13:38:02,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +19: [2023-05-25 13:38:02,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +14: [2023-05-25 13:38:02,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt... +19: [2023-05-25 13:38:02,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,196] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt... +23: [2023-05-25 13:38:02,196] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +23: [2023-05-25 13:38:02,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,197] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt... +19: [2023-05-25 13:38:02,197] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,197] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,197] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +11: [2023-05-25 13:38:02,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +11: [2023-05-25 13:38:02,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt... +23: [2023-05-25 13:38:02,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,199] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,199] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt. +16: [2023-05-25 13:38:02,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +16: [2023-05-25 13:38:02,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt... + 9: [2023-05-25 13:38:02,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +19: [2023-05-25 13:38:02,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt. +19: [2023-05-25 13:38:02,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt... +19: [2023-05-25 13:38:02,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +16: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt... +16: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt... +20: [2023-05-25 13:38:02,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +19: [2023-05-25 13:38:02,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt... +20: [2023-05-25 13:38:02,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +21: [2023-05-25 13:38:02,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +16: [2023-05-25 13:38:02,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +16: [2023-05-25 13:38:02,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +21: [2023-05-25 13:38:02,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +17: [2023-05-25 13:38:02,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt. +17: [2023-05-25 13:38:02,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt. +16: [2023-05-25 13:38:02,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt... +16: [2023-05-25 13:38:02,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt... + 9: [2023-05-25 13:38:02,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +25: [2023-05-25 13:38:02,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +22: [2023-05-25 13:38:02,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +22: [2023-05-25 13:38:02,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt... +25: [2023-05-25 13:38:02,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +25: [2023-05-25 13:38:02,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +22: [2023-05-25 13:38:02,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt... +22: [2023-05-25 13:38:02,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt... + 2: [2023-05-25 13:38:02,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt... + 2: [2023-05-25 13:38:02,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +25: [2023-05-25 13:38:02,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt... +11: [2023-05-25 13:38:02,219] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. +20: [2023-05-25 13:38:02,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt. +20: [2023-05-25 13:38:02,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt. +11: [2023-05-25 13:38:02,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt... +23: [2023-05-25 13:38:02,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt. +23: [2023-05-25 13:38:02,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt. +21: [2023-05-25 13:38:02,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt. +17: [2023-05-25 13:38:02,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +17: [2023-05-25 13:38:02,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +18: [2023-05-25 13:38:02,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt. +21: [2023-05-25 13:38:02,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +18: [2023-05-25 13:38:02,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_31-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +24: [2023-05-25 13:38:02,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:02,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +11: [2023-05-25 13:38:02,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +24: [2023-05-25 13:38:02,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt... +24: [2023-05-25 13:38:02,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +26: [2023-05-25 13:38:02,230] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +26: [2023-05-25 13:38:02,230] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +11: [2023-05-25 13:38:02,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt... + 2: [2023-05-25 13:38:02,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +20: [2023-05-25 13:38:02,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt... +26: [2023-05-25 13:38:02,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +20: [2023-05-25 13:38:02,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +20: [2023-05-25 13:38:02,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +26: [2023-05-25 13:38:02,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +23: [2023-05-25 13:38:02,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +18: [2023-05-25 13:38:02,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +23: [2023-05-25 13:38:02,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +21: [2023-05-25 13:38:02,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +21: [2023-05-25 13:38:02,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... +18: [2023-05-25 13:38:02,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +20: [2023-05-25 13:38:02,239] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,242] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +20: [2023-05-25 13:38:02,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt... +20: [2023-05-25 13:38:02,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt. + 5: [2023-05-25 13:38:02,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt. + 3: [2023-05-25 13:38:02,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt... + 1: [2023-05-25 13:38:02,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt... +29: [2023-05-25 13:38:02,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +29: [2023-05-25 13:38:02,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt... +25: [2023-05-25 13:38:02,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +25: [2023-05-25 13:38:02,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt... +29: [2023-05-25 13:38:02,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:02,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +17: [2023-05-25 13:38:02,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +18: [2023-05-25 13:38:02,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +17: [2023-05-25 13:38:02,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt... +20: [2023-05-25 13:38:02,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +23: [2023-05-25 13:38:02,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +18: [2023-05-25 13:38:02,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt... +20: [2023-05-25 13:38:02,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt... +23: [2023-05-25 13:38:02,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt... +17: [2023-05-25 13:38:02,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +17: [2023-05-25 13:38:02,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt... +26: [2023-05-25 13:38:02,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt. +27: [2023-05-25 13:38:02,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +23: [2023-05-25 13:38:02,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +27: [2023-05-25 13:38:02,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt. + 3: [2023-05-25 13:38:02,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +28: [2023-05-25 13:38:02,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +28: [2023-05-25 13:38:02,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +31: [2023-05-25 13:38:02,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +20: [2023-05-25 13:38:02,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +23: [2023-05-25 13:38:02,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt... +27: [2023-05-25 13:38:02,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +28: [2023-05-25 13:38:02,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +27: [2023-05-25 13:38:02,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +25: [2023-05-25 13:38:02,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:02,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +31: [2023-05-25 13:38:02,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:02,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:02,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +28: [2023-05-25 13:38:02,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +20: [2023-05-25 13:38:02,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt... +21: [2023-05-25 13:38:02,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +21: [2023-05-25 13:38:02,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +31: [2023-05-25 13:38:02,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +31: [2023-05-25 13:38:02,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt. +31: [2023-05-25 13:38:02,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt. +31: [2023-05-25 13:38:02,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +18: [2023-05-25 13:38:02,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_00-model_states.pt. +30: [2023-05-25 13:38:02,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +30: [2023-05-25 13:38:02,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +21: [2023-05-25 13:38:02,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt... +21: [2023-05-25 13:38:02,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt... +26: [2023-05-25 13:38:02,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +18: [2023-05-25 13:38:02,269] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt... +24: [2023-05-25 13:38:02,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:02,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt. + 6: [2023-05-25 13:38:02,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_08-model_02-model_states.pt. +26: [2023-05-25 13:38:02,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... +31: [2023-05-25 13:38:02,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +26: [2023-05-25 13:38:02,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +31: [2023-05-25 13:38:02,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +25: [2023-05-25 13:38:02,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:02,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:02,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:02,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:02,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:02,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:02,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +26: [2023-05-25 13:38:02,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:02,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:02,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:02,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:02,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:02,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:02,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt. +29: [2023-05-25 13:38:02,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt. +24: [2023-05-25 13:38:02,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +24: [2023-05-25 13:38:02,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:02,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:02,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:02,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:02,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:02,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt... +25: [2023-05-25 13:38:02,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_12_optim_states.pt... +25: [2023-05-25 13:38:02,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_12_optim_states.pt... +29: [2023-05-25 13:38:02,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt... +29: [2023-05-25 13:38:02,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt... +29: [2023-05-25 13:38:02,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +26: [2023-05-25 13:38:02,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_12_optim_states.pt... +26: [2023-05-25 13:38:02,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_12_optim_states.pt... + 3: [2023-05-25 13:38:02,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt... +29: [2023-05-25 13:38:02,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +28: [2023-05-25 13:38:02,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +28: [2023-05-25 13:38:02,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:02,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_12_optim_states.pt... +24: [2023-05-25 13:38:02,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_12_optim_states.pt... +26: [2023-05-25 13:38:02,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt. + 2: [2023-05-25 13:38:02,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +26: [2023-05-25 13:38:02,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt. +24: [2023-05-25 13:38:02,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt. +24: [2023-05-25 13:38:02,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt. +30: [2023-05-25 13:38:02,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:02,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +25: [2023-05-25 13:38:02,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt. +25: [2023-05-25 13:38:02,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt. +29: [2023-05-25 13:38:02,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt... +31: [2023-05-25 13:38:02,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +28: [2023-05-25 13:38:02,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt. +28: [2023-05-25 13:38:02,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt. +28: [2023-05-25 13:38:02,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt. +28: [2023-05-25 13:38:02,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt. +27: [2023-05-25 13:38:02,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +27: [2023-05-25 13:38:02,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +27: [2023-05-25 13:38:02,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt. +24: [2023-05-25 13:38:02,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt. + 2: [2023-05-25 13:38:02,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +29: [2023-05-25 13:38:02,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:02,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt. +27: [2023-05-25 13:38:02,309] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt. + 2: [2023-05-25 13:38:02,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt... +31: [2023-05-25 13:38:02,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +28: [2023-05-25 13:38:02,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt. +28: [2023-05-25 13:38:02,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt. +31: [2023-05-25 13:38:02,312] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_00-model_states.pt. +26: [2023-05-25 13:38:02,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt... +28: [2023-05-25 13:38:02,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:02,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +30: [2023-05-25 13:38:02,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt... +31: [2023-05-25 13:38:02,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt... +31: [2023-05-25 13:38:02,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:02,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +31: [2023-05-25 13:38:02,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt... +30: [2023-05-25 13:38:02,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt. +24: [2023-05-25 13:38:02,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +30: [2023-05-25 13:38:02,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +31: [2023-05-25 13:38:02,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +20: [2023-05-25 13:38:02,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt. +29: [2023-05-25 13:38:02,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:02,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:02,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +25: [2023-05-25 13:38:02,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:02,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:02,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +29: [2023-05-25 13:38:02,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:02,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +29: [2023-05-25 13:38:02,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +30: [2023-05-25 13:38:02,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +15: [2023-05-25 13:38:02,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +26: [2023-05-25 13:38:02,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +15: [2023-05-25 13:38:02,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +29: [2023-05-25 13:38:02,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +20: [2023-05-25 13:38:02,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt. +15: [2023-05-25 13:38:02,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +28: [2023-05-25 13:38:02,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +28: [2023-05-25 13:38:02,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +27: [2023-05-25 13:38:02,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +30: [2023-05-25 13:38:02,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:02,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +30: [2023-05-25 13:38:02,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:02,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +28: [2023-05-25 13:38:02,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:02,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +30: [2023-05-25 13:38:02,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +15: [2023-05-25 13:38:02,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +28: [2023-05-25 13:38:02,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +30: [2023-05-25 13:38:02,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +30: [2023-05-25 13:38:02,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +28: [2023-05-25 13:38:02,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +29: [2023-05-25 13:38:02,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt... +30: [2023-05-25 13:38:02,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt. +28: [2023-05-25 13:38:02,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +28: [2023-05-25 13:38:02,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +30: [2023-05-25 13:38:02,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +28: [2023-05-25 13:38:02,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +24: [2023-05-25 13:38:02,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +28: [2023-05-25 13:38:02,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +26: [2023-05-25 13:38:02,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt. +26: [2023-05-25 13:38:02,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt. +31: [2023-05-25 13:38:02,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +31: [2023-05-25 13:38:02,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:02,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +27: [2023-05-25 13:38:02,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +31: [2023-05-25 13:38:02,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:02,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +31: [2023-05-25 13:38:02,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:02,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +28: [2023-05-25 13:38:02,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +28: [2023-05-25 13:38:02,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:02,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_12_optim_states.pt... +29: [2023-05-25 13:38:02,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_12_optim_states.pt... +31: [2023-05-25 13:38:02,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt. +27: [2023-05-25 13:38:02,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:02,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +31: [2023-05-25 13:38:02,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt. +29: [2023-05-25 13:38:02,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt. +29: [2023-05-25 13:38:02,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt. +27: [2023-05-25 13:38:02,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:02,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +27: [2023-05-25 13:38:02,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +20: [2023-05-25 13:38:02,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +27: [2023-05-25 13:38:02,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:02,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt. +25: [2023-05-25 13:38:02,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt. +28: [2023-05-25 13:38:02,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_12_optim_states.pt... +28: [2023-05-25 13:38:02,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_12_optim_states.pt... +30: [2023-05-25 13:38:02,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +30: [2023-05-25 13:38:02,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt. +31: [2023-05-25 13:38:02,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_12_optim_states.pt... +31: [2023-05-25 13:38:02,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_12_optim_states.pt... +29: [2023-05-25 13:38:02,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +20: [2023-05-25 13:38:02,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +30: [2023-05-25 13:38:02,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +22: [2023-05-25 13:38:02,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt. + 4: [2023-05-25 13:38:02,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt. +29: [2023-05-25 13:38:02,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt. +22: [2023-05-25 13:38:02,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt. +30: [2023-05-25 13:38:02,336] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:02,337] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt... +29: [2023-05-25 13:38:02,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt. +30: [2023-05-25 13:38:02,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt. +30: [2023-05-25 13:38:02,337] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_12_optim_states.pt... +30: [2023-05-25 13:38:02,337] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_12_optim_states.pt... +27: [2023-05-25 13:38:02,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_12_optim_states.pt... +27: [2023-05-25 13:38:02,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_12_optim_states.pt... +31: [2023-05-25 13:38:02,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt. +26: [2023-05-25 13:38:02,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +26: [2023-05-25 13:38:02,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +27: [2023-05-25 13:38:02,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt. +27: [2023-05-25 13:38:02,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_01-model_states.pt. +30: [2023-05-25 13:38:02,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt. +31: [2023-05-25 13:38:02,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_03-model_states.pt. +11: [2023-05-25 13:38:02,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +11: [2023-05-25 13:38:02,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +31: [2023-05-25 13:38:02,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:02,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:02,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +24: [2023-05-25 13:38:02,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +26: [2023-05-25 13:38:02,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt. +25: [2023-05-25 13:38:02,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt. +26: [2023-05-25 13:38:02,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt. +11: [2023-05-25 13:38:02,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +31: [2023-05-25 13:38:02,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +25: [2023-05-25 13:38:02,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +25: [2023-05-25 13:38:02,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +25: [2023-05-25 13:38:02,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt. +11: [2023-05-25 13:38:02,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +27: [2023-05-25 13:38:02,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt. +24: [2023-05-25 13:38:02,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +27: [2023-05-25 13:38:02,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt. +24: [2023-05-25 13:38:02,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt... +24: [2023-05-25 13:38:02,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt. +24: [2023-05-25 13:38:02,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_43-model_02-model_states.pt. +22: [2023-05-25 13:38:02,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +24: [2023-05-25 13:38:02,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:02,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt... +30: [2023-05-25 13:38:02,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:02,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:02,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +31: [2023-05-25 13:38:02,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +24: [2023-05-25 13:38:02,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt... +30: [2023-05-25 13:38:02,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +25: [2023-05-25 13:38:02,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +25: [2023-05-25 13:38:02,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt. +18: [2023-05-25 13:38:02,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt. + 4: [2023-05-25 13:38:02,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +26: [2023-05-25 13:38:02,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +15: [2023-05-25 13:38:02,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +10: [2023-05-25 13:38:02,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +22: [2023-05-25 13:38:02,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +18: [2023-05-25 13:38:02,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt. +10: [2023-05-25 13:38:02,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +10: [2023-05-25 13:38:02,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +19: [2023-05-25 13:38:02,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt. +19: [2023-05-25 13:38:02,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt. +28: [2023-05-25 13:38:02,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +28: [2023-05-25 13:38:02,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +27: [2023-05-25 13:38:02,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +25: [2023-05-25 13:38:02,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt... +25: [2023-05-25 13:38:02,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt... +10: [2023-05-25 13:38:02,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +26: [2023-05-25 13:38:02,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +27: [2023-05-25 13:38:02,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +17: [2023-05-25 13:38:02,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt. +17: [2023-05-25 13:38:02,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt. +28: [2023-05-25 13:38:02,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt... +28: [2023-05-25 13:38:02,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt... +30: [2023-05-25 13:38:02,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +31: [2023-05-25 13:38:02,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +27: [2023-05-25 13:38:02,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +15: [2023-05-25 13:38:02,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +26: [2023-05-25 13:38:02,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +24: [2023-05-25 13:38:02,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +26: [2023-05-25 13:38:02,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +26: [2023-05-25 13:38:02,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt... +14: [2023-05-25 13:38:02,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +14: [2023-05-25 13:38:02,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +28: [2023-05-25 13:38:02,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +28: [2023-05-25 13:38:02,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +25: [2023-05-25 13:38:02,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +25: [2023-05-25 13:38:02,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +24: [2023-05-25 13:38:02,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +26: [2023-05-25 13:38:02,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt... +14: [2023-05-25 13:38:02,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +24: [2023-05-25 13:38:02,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt... +28: [2023-05-25 13:38:02,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:02,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +15: [2023-05-25 13:38:02,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt. +15: [2023-05-25 13:38:02,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt. +14: [2023-05-25 13:38:02,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +27: [2023-05-25 13:38:02,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt... +13: [2023-05-25 13:38:02,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +28: [2023-05-25 13:38:02,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +13: [2023-05-25 13:38:02,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +28: [2023-05-25 13:38:02,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt... +28: [2023-05-25 13:38:02,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt... +27: [2023-05-25 13:38:02,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +27: [2023-05-25 13:38:02,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +13: [2023-05-25 13:38:02,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +13: [2023-05-25 13:38:02,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +28: [2023-05-25 13:38:02,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt... + 1: [2023-05-25 13:38:02,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +28: [2023-05-25 13:38:02,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt... +27: [2023-05-25 13:38:02,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt... +27: [2023-05-25 13:38:02,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt... +18: [2023-05-25 13:38:02,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +26: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +26: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +18: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +18: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +18: [2023-05-25 13:38:02,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:02,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +19: [2023-05-25 13:38:02,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +15: [2023-05-25 13:38:02,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +18: [2023-05-25 13:38:02,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +18: [2023-05-25 13:38:02,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:02,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:02,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt. + 9: [2023-05-25 13:38:02,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt. +26: [2023-05-25 13:38:02,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt... +26: [2023-05-25 13:38:02,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt... + 8: [2023-05-25 13:38:02,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +30: [2023-05-25 13:38:02,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +15: [2023-05-25 13:38:02,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +31: [2023-05-25 13:38:02,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +23: [2023-05-25 13:38:02,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +30: [2023-05-25 13:38:02,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt... +23: [2023-05-25 13:38:02,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +31: [2023-05-25 13:38:02,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt... +23: [2023-05-25 13:38:02,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +23: [2023-05-25 13:38:02,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +29: [2023-05-25 13:38:02,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +25: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +21: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +21: [2023-05-25 13:38:02,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +15: [2023-05-25 13:38:02,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +21: [2023-05-25 13:38:02,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +29: [2023-05-25 13:38:02,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt... +21: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +30: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +25: [2023-05-25 13:38:02,379] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt... +31: [2023-05-25 13:38:02,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +15: [2023-05-25 13:38:02,380] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +31: [2023-05-25 13:38:02,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +10: [2023-05-25 13:38:02,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt. +30: [2023-05-25 13:38:02,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +10: [2023-05-25 13:38:02,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt. +30: [2023-05-25 13:38:02,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt... +12: [2023-05-25 13:38:02,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +31: [2023-05-25 13:38:02,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt... +12: [2023-05-25 13:38:02,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +29: [2023-05-25 13:38:02,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +25: [2023-05-25 13:38:02,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +29: [2023-05-25 13:38:02,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt. +29: [2023-05-25 13:38:02,383] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +31: [2023-05-25 13:38:02,383] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt... + 6: [2023-05-25 13:38:02,383] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt. +12: [2023-05-25 13:38:02,383] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:02,383] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +26: [2023-05-25 13:38:02,383] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:02,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt... +11: [2023-05-25 13:38:02,384] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +11: [2023-05-25 13:38:02,384] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +12: [2023-05-25 13:38:02,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +29: [2023-05-25 13:38:02,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +25: [2023-05-25 13:38:02,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +13: [2023-05-25 13:38:02,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt. +13: [2023-05-25 13:38:02,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt. +26: [2023-05-25 13:38:02,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt... +29: [2023-05-25 13:38:02,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt... +29: [2023-05-25 13:38:02,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt... +30: [2023-05-25 13:38:02,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +27: [2023-05-25 13:38:02,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +22: [2023-05-25 13:38:02,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +22: [2023-05-25 13:38:02,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +22: [2023-05-25 13:38:02,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +27: [2023-05-25 13:38:02,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +10: [2023-05-25 13:38:02,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +22: [2023-05-25 13:38:02,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +27: [2023-05-25 13:38:02,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt... +31: [2023-05-25 13:38:02,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +23: [2023-05-25 13:38:02,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt. +30: [2023-05-25 13:38:02,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +22: [2023-05-25 13:38:02,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +30: [2023-05-25 13:38:02,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt... +27: [2023-05-25 13:38:02,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt... +22: [2023-05-25 13:38:02,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt... +26: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +23: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt. +31: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt... +24: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +16: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt. +16: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +22: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +30: [2023-05-25 13:38:02,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt... +25: [2023-05-25 13:38:02,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +25: [2023-05-25 13:38:02,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +14: [2023-05-25 13:38:02,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt. +14: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt. +26: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt... +16: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +16: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +22: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt... +30: [2023-05-25 13:38:02,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +10: [2023-05-25 13:38:02,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt. + 2: [2023-05-25 13:38:02,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt. +11: [2023-05-25 13:38:02,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +30: [2023-05-25 13:38:02,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +25: [2023-05-25 13:38:02,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt... +25: [2023-05-25 13:38:02,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt... +10: [2023-05-25 13:38:02,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +20: [2023-05-25 13:38:02,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:02,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:02,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +10: [2023-05-25 13:38:02,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +11: [2023-05-25 13:38:02,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +20: [2023-05-25 13:38:02,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +18: [2023-05-25 13:38:02,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +18: [2023-05-25 13:38:02,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +18: [2023-05-25 13:38:02,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +14: [2023-05-25 13:38:02,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +14: [2023-05-25 13:38:02,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +20: [2023-05-25 13:38:02,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +27: [2023-05-25 13:38:02,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +18: [2023-05-25 13:38:02,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:02,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +13: [2023-05-25 13:38:02,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +18: [2023-05-25 13:38:02,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt... +18: [2023-05-25 13:38:02,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt... +20: [2023-05-25 13:38:02,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt... +20: [2023-05-25 13:38:02,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt... +13: [2023-05-25 13:38:02,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +16: [2023-05-25 13:38:02,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt. +27: [2023-05-25 13:38:02,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_00-model_states.pt. +16: [2023-05-25 13:38:02,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt. +10: [2023-05-25 13:38:02,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +13: [2023-05-25 13:38:02,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +24: [2023-05-25 13:38:02,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt... +24: [2023-05-25 13:38:02,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt... +27: [2023-05-25 13:38:02,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt... +23: [2023-05-25 13:38:02,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +27: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt... +12: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt. +12: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt. + 3: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt. +23: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_03-model_states.pt. +13: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt. +15: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt. +14: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt. +15: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt. +13: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +13: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt. +14: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +14: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt. +15: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +15: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt. +21: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt. +15: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt. +19: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +19: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +19: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +19: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +14: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +18: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +22: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +15: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt... +18: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +19: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:02,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +15: [2023-05-25 13:38:02,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt... +19: [2023-05-25 13:38:02,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt... +19: [2023-05-25 13:38:02,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt... +21: [2023-05-25 13:38:02,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_02-model_states.pt. +21: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +16: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +12: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +23: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +21: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +23: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +14: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +16: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +13: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +12: [2023-05-25 13:38:02,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt... +12: [2023-05-25 13:38:02,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +12: [2023-05-25 13:38:02,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +14: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +13: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +15: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +22: [2023-05-25 13:38:02,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +13: [2023-05-25 13:38:02,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +13: [2023-05-25 13:38:02,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +21: [2023-05-25 13:38:02,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +14: [2023-05-25 13:38:02,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +22: [2023-05-25 13:38:02,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt... +15: [2023-05-25 13:38:02,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +23: [2023-05-25 13:38:02,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +15: [2023-05-25 13:38:02,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +12: [2023-05-25 13:38:02,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +12: [2023-05-25 13:38:02,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +23: [2023-05-25 13:38:02,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +21: [2023-05-25 13:38:02,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +21: [2023-05-25 13:38:02,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +21: [2023-05-25 13:38:02,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +11: [2023-05-25 13:38:02,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt. +11: [2023-05-25 13:38:02,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt. +10: [2023-05-25 13:38:02,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +13: [2023-05-25 13:38:02,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +13: [2023-05-25 13:38:02,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt... +17: [2023-05-25 13:38:02,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt. + 9: [2023-05-25 13:38:02,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +21: [2023-05-25 13:38:02,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt. +17: [2023-05-25 13:38:02,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt. +21: [2023-05-25 13:38:02,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt. +23: [2023-05-25 13:38:02,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +10: [2023-05-25 13:38:02,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt... +10: [2023-05-25 13:38:02,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +16: [2023-05-25 13:38:02,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +22: [2023-05-25 13:38:02,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt... +16: [2023-05-25 13:38:02,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +10: [2023-05-25 13:38:02,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt... +20: [2023-05-25 13:38:02,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:02,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +23: [2023-05-25 13:38:02,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt... +13: [2023-05-25 13:38:02,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +23: [2023-05-25 13:38:02,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +14: [2023-05-25 13:38:02,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +13: [2023-05-25 13:38:02,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt... + 8: [2023-05-25 13:38:02,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +23: [2023-05-25 13:38:02,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt... +14: [2023-05-25 13:38:02,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt... +19: [2023-05-25 13:38:02,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt... +16: [2023-05-25 13:38:02,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +16: [2023-05-25 13:38:02,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +14: [2023-05-25 13:38:02,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +16: [2023-05-25 13:38:02,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt... +19: [2023-05-25 13:38:02,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +11: [2023-05-25 13:38:02,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +16: [2023-05-25 13:38:02,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt... +14: [2023-05-25 13:38:02,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt... +11: [2023-05-25 13:38:02,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +21: [2023-05-25 13:38:02,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +16: [2023-05-25 13:38:02,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +17: [2023-05-25 13:38:02,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +21: [2023-05-25 13:38:02,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:02,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +20: [2023-05-25 13:38:02,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +17: [2023-05-25 13:38:02,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +17: [2023-05-25 13:38:02,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +17: [2023-05-25 13:38:02,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +17: [2023-05-25 13:38:02,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:02,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +16: [2023-05-25 13:38:02,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +12: [2023-05-25 13:38:02,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +12: [2023-05-25 13:38:02,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +14: [2023-05-25 13:38:02,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +17: [2023-05-25 13:38:02,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt... +10: [2023-05-25 13:38:02,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt. +17: [2023-05-25 13:38:02,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +10: [2023-05-25 13:38:02,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt. +17: [2023-05-25 13:38:02,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:02,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt... +12: [2023-05-25 13:38:02,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt... +12: [2023-05-25 13:38:02,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt... +14: [2023-05-25 13:38:02,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt... +13: [2023-05-25 13:38:02,453] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:02,454] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +19: [2023-05-25 13:38:02,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +14: [2023-05-25 13:38:02,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:02,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt... +14: [2023-05-25 13:38:02,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt... +13: [2023-05-25 13:38:02,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt... +15: [2023-05-25 13:38:02,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +21: [2023-05-25 13:38:02,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +21: [2023-05-25 13:38:02,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt. + 8: [2023-05-25 13:38:02,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt. +13: [2023-05-25 13:38:02,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:02,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt... +19: [2023-05-25 13:38:02,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +15: [2023-05-25 13:38:02,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:02,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +21: [2023-05-25 13:38:02,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt... +21: [2023-05-25 13:38:02,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt... +13: [2023-05-25 13:38:02,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt... +15: [2023-05-25 13:38:02,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt... +10: [2023-05-25 13:38:02,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:02,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +15: [2023-05-25 13:38:02,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt... +10: [2023-05-25 13:38:02,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt. +18: [2023-05-25 13:38:02,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt. +10: [2023-05-25 13:38:02,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt. +18: [2023-05-25 13:38:02,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt. +14: [2023-05-25 13:38:02,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt. +14: [2023-05-25 13:38:02,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +22: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +11: [2023-05-25 13:38:02,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt. +11: [2023-05-25 13:38:02,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt. +12: [2023-05-25 13:38:02,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt. +23: [2023-05-25 13:38:02,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt. +23: [2023-05-25 13:38:02,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt. +12: [2023-05-25 13:38:02,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +22: [2023-05-25 13:38:02,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt. +21: [2023-05-25 13:38:02,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +11: [2023-05-25 13:38:02,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt. + 5: [2023-05-25 13:38:02,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt. +11: [2023-05-25 13:38:02,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_01-model_states.pt. + 5: [2023-05-25 13:38:02,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,476] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +11: [2023-05-25 13:38:02,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt. +17: [2023-05-25 13:38:02,477] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,477] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt. +13: [2023-05-25 13:38:02,477] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt. +21: [2023-05-25 13:38:02,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt... +13: [2023-05-25 13:38:02,477] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,477] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +17: [2023-05-25 13:38:02,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +21: [2023-05-25 13:38:02,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +18: [2023-05-25 13:38:02,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt. + 5: [2023-05-25 13:38:02,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +18: [2023-05-25 13:38:02,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +14: [2023-05-25 13:38:02,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +21: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt... +14: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +11: [2023-05-25 13:38:02,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt... + 8: [2023-05-25 13:38:02,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +19: [2023-05-25 13:38:02,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt. +17: [2023-05-25 13:38:02,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +12: [2023-05-25 13:38:02,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt. +22: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +17: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +23: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +19: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt. +12: [2023-05-25 13:38:02,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt. +12: [2023-05-25 13:38:02,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +17: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt... +22: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +11: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt. +23: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +12: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt. +20: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt. +20: [2023-05-25 13:38:02,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt. +11: [2023-05-25 13:38:02,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +11: [2023-05-25 13:38:02,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +11: [2023-05-25 13:38:02,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +11: [2023-05-25 13:38:02,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +23: [2023-05-25 13:38:02,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt. +11: [2023-05-25 13:38:02,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt... +23: [2023-05-25 13:38:02,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt. +13: [2023-05-25 13:38:02,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +16: [2023-05-25 13:38:02,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt. +13: [2023-05-25 13:38:02,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +16: [2023-05-25 13:38:02,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt. +18: [2023-05-25 13:38:02,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt. +18: [2023-05-25 13:38:02,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt... + 1: [2023-05-25 13:38:02,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt. +22: [2023-05-25 13:38:02,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt. +22: [2023-05-25 13:38:02,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +12: [2023-05-25 13:38:02,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +19: [2023-05-25 13:38:02,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:02,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +17: [2023-05-25 13:38:02,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +19: [2023-05-25 13:38:02,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +12: [2023-05-25 13:38:02,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +20: [2023-05-25 13:38:02,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +20: [2023-05-25 13:38:02,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt. + 9: [2023-05-25 13:38:02,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_02-model_states.pt. +10: [2023-05-25 13:38:02,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +10: [2023-05-25 13:38:02,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +19: [2023-05-25 13:38:02,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt. + 9: [2023-05-25 13:38:02,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt. +19: [2023-05-25 13:38:02,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt. +16: [2023-05-25 13:38:02,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_21-model_03-model_states.pt. +16: [2023-05-25 13:38:02,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +18: [2023-05-25 13:38:02,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +18: [2023-05-25 13:38:02,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +23: [2023-05-25 13:38:02,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +10: [2023-05-25 13:38:02,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt... +10: [2023-05-25 13:38:02,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt... +18: [2023-05-25 13:38:02,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +14: [2023-05-25 13:38:02,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +18: [2023-05-25 13:38:02,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt... +23: [2023-05-25 13:38:02,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +14: [2023-05-25 13:38:02,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +14: [2023-05-25 13:38:02,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt... +22: [2023-05-25 13:38:02,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +23: [2023-05-25 13:38:02,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +16: [2023-05-25 13:38:02,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt. + 3: [2023-05-25 13:38:02,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt... + 1: [2023-05-25 13:38:02,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +14: [2023-05-25 13:38:02,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt... + 1: [2023-05-25 13:38:02,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +16: [2023-05-25 13:38:02,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt. +23: [2023-05-25 13:38:02,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt... +18: [2023-05-25 13:38:02,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +22: [2023-05-25 13:38:02,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +22: [2023-05-25 13:38:02,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +10: [2023-05-25 13:38:02,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt... +10: [2023-05-25 13:38:02,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +21: [2023-05-25 13:38:02,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +21: [2023-05-25 13:38:02,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt... +12: [2023-05-25 13:38:02,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +18: [2023-05-25 13:38:02,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:02,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt... +10: [2023-05-25 13:38:02,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt... +12: [2023-05-25 13:38:02,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt... + 8: [2023-05-25 13:38:02,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +22: [2023-05-25 13:38:02,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +19: [2023-05-25 13:38:02,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... +19: [2023-05-25 13:38:02,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +11: [2023-05-25 13:38:02,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +11: [2023-05-25 13:38:02,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +23: [2023-05-25 13:38:02,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +12: [2023-05-25 13:38:02,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt... + 9: [2023-05-25 13:38:02,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt... + 9: [2023-05-25 13:38:02,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +23: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +13: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +13: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +11: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +11: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +11: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt... + 4: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt... + 4: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt... + 4: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt... +11: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt... +12: [2023-05-25 13:38:02,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt... +11: [2023-05-25 13:38:02,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt... +13: [2023-05-25 13:38:02,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt... +13: [2023-05-25 13:38:02,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt... +11: [2023-05-25 13:38:02,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt... + 6: [2023-05-25 13:38:02,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +20: [2023-05-25 13:38:02,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt... + 5: [2023-05-25 13:38:02,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt... +22: [2023-05-25 13:38:02,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +16: [2023-05-25 13:38:02,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +20: [2023-05-25 13:38:02,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt... +19: [2023-05-25 13:38:02,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +22: [2023-05-25 13:38:02,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt... +19: [2023-05-25 13:38:02,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +12: [2023-05-25 13:38:02,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +16: [2023-05-25 13:38:02,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:02,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt... +21: [2023-05-25 13:38:02,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +23: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +21: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt... +19: [2023-05-25 13:38:02,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt... +12: [2023-05-25 13:38:02,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt... + 7: [2023-05-25 13:38:02,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +23: [2023-05-25 13:38:02,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt... + 5: [2023-05-25 13:38:02,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +20: [2023-05-25 13:38:02,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +18: [2023-05-25 13:38:02,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +12: [2023-05-25 13:38:02,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,535] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt... +20: [2023-05-25 13:38:02,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,536] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt... +18: [2023-05-25 13:38:02,536] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt... +12: [2023-05-25 13:38:02,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt... + 7: [2023-05-25 13:38:02,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt... + 2: [2023-05-25 13:38:02,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,539] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +18: [2023-05-25 13:38:02,540] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +18: [2023-05-25 13:38:02,542] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +19: [2023-05-25 13:38:02,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:02,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt. +20: [2023-05-25 13:38:02,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_03-model_states.pt. +22: [2023-05-25 13:38:02,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,544] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +16: [2023-05-25 13:38:02,544] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +16: [2023-05-25 13:38:02,544] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt... + 1: [2023-05-25 13:38:02,544] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,544] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,544] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,544] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,544] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +22: [2023-05-25 13:38:02,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt... +16: [2023-05-25 13:38:02,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt... +16: [2023-05-25 13:38:02,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt... +23: [2023-05-25 13:38:02,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt... + 1: [2023-05-25 13:38:02,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt... + 1: [2023-05-25 13:38:02,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt... + 0: [2023-05-25 13:38:02,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt... +19: [2023-05-25 13:38:02,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt... + 1: [2023-05-25 13:38:02,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +16: [2023-05-25 13:38:02,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +23: [2023-05-25 13:38:02,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt... + 6: [2023-05-25 13:38:02,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +16: [2023-05-25 13:38:02,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt... +19: [2023-05-25 13:38:02,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +17: [2023-05-25 13:38:02,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt. +17: [2023-05-25 13:38:02,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_32-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. +19: [2023-05-25 13:38:02,553] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt... +21: [2023-05-25 13:38:02,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt... +21: [2023-05-25 13:38:02,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt... +22: [2023-05-25 13:38:02,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,557] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +20: [2023-05-25 13:38:02,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... +20: [2023-05-25 13:38:02,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,557] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +22: [2023-05-25 13:38:02,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt... + 3: [2023-05-25 13:38:02,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,560] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,560] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt... +16: [2023-05-25 13:38:02,562] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,563] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt... + 1: [2023-05-25 13:38:02,563] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,563] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +16: [2023-05-25 13:38:02,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt... +21: [2023-05-25 13:38:02,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +17: [2023-05-25 13:38:02,566] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,566] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +21: [2023-05-25 13:38:02,567] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt... +17: [2023-05-25 13:38:02,567] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,568] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +15: [2023-05-25 13:38:02,569] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +15: [2023-05-25 13:38:02,569] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +15: [2023-05-25 13:38:02,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:02,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,574] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt... + 2: [2023-05-25 13:38:02,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,576] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt... + 4: [2023-05-25 13:38:02,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt. + 4: [2023-05-25 13:38:02,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt. +17: [2023-05-25 13:38:02,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:02,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:02,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +17: [2023-05-25 13:38:02,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt... +20: [2023-05-25 13:38:02,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt... +20: [2023-05-25 13:38:02,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,593] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,593] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt. + 3: [2023-05-25 13:38:02,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt. + 3: [2023-05-25 13:38:02,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt. +15: [2023-05-25 13:38:02,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,598] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,599] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +11: [2023-05-25 13:38:02,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +11: [2023-05-25 13:38:02,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +11: [2023-05-25 13:38:02,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt. +11: [2023-05-25 13:38:02,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +10: [2023-05-25 13:38:02,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +10: [2023-05-25 13:38:02,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +10: [2023-05-25 13:38:02,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:02,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +10: [2023-05-25 13:38:02,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt. + 6: [2023-05-25 13:38:02,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +17: [2023-05-25 13:38:02,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_00-model_states.pt. +28: [2023-05-25 13:38:02,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt. +28: [2023-05-25 13:38:02,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt. + 7: [2023-05-25 13:38:02,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +17: [2023-05-25 13:38:02,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt... + 2: [2023-05-25 13:38:02,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +15: [2023-05-25 13:38:02,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,617] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +15: [2023-05-25 13:38:02,617] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +28: [2023-05-25 13:38:02,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:02,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +28: [2023-05-25 13:38:02,620] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:02,620] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +13: [2023-05-25 13:38:02,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +13: [2023-05-25 13:38:02,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +13: [2023-05-25 13:38:02,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +13: [2023-05-25 13:38:02,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +25: [2023-05-25 13:38:02,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +25: [2023-05-25 13:38:02,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt. +30: [2023-05-25 13:38:02,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt. +19: [2023-05-25 13:38:02,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt. + 4: [2023-05-25 13:38:02,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +19: [2023-05-25 13:38:02,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt. +30: [2023-05-25 13:38:02,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt. +14: [2023-05-25 13:38:02,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +14: [2023-05-25 13:38:02,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +11: [2023-05-25 13:38:02,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt... +14: [2023-05-25 13:38:02,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:02,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt... + 0: [2023-05-25 13:38:02,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +11: [2023-05-25 13:38:02,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,636] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt... +26: [2023-05-25 13:38:02,637] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,637] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +26: [2023-05-25 13:38:02,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,639] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt... +28: [2023-05-25 13:38:02,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt... +28: [2023-05-25 13:38:02,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt... +30: [2023-05-25 13:38:02,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt... + 0: [2023-05-25 13:38:02,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt... + 1: [2023-05-25 13:38:02,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +30: [2023-05-25 13:38:02,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:02,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:02,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +10: [2023-05-25 13:38:02,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +25: [2023-05-25 13:38:02,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +10: [2023-05-25 13:38:02,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +25: [2023-05-25 13:38:02,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +30: [2023-05-25 13:38:02,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:02,642] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt... +25: [2023-05-25 13:38:02,642] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt... +30: [2023-05-25 13:38:02,642] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt... +30: [2023-05-25 13:38:02,642] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +30: [2023-05-25 13:38:02,642] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt... +19: [2023-05-25 13:38:02,642] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +19: [2023-05-25 13:38:02,642] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,642] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,643] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt... + 8: [2023-05-25 13:38:02,643] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +24: [2023-05-25 13:38:02,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,646] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt... +24: [2023-05-25 13:38:02,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,646] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt... + 2: [2023-05-25 13:38:02,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +11: [2023-05-25 13:38:02,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +11: [2023-05-25 13:38:02,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt... +23: [2023-05-25 13:38:02,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +23: [2023-05-25 13:38:02,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +23: [2023-05-25 13:38:02,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +23: [2023-05-25 13:38:02,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +28: [2023-05-25 13:38:02,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt. +10: [2023-05-25 13:38:02,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +26: [2023-05-25 13:38:02,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:02,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +18: [2023-05-25 13:38:02,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt. +28: [2023-05-25 13:38:02,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt. +10: [2023-05-25 13:38:02,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +18: [2023-05-25 13:38:02,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt. +30: [2023-05-25 13:38:02,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt. +30: [2023-05-25 13:38:02,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +26: [2023-05-25 13:38:02,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +26: [2023-05-25 13:38:02,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +28: [2023-05-25 13:38:02,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt. + 6: [2023-05-25 13:38:02,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt... +28: [2023-05-25 13:38:02,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt. + 2: [2023-05-25 13:38:02,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +24: [2023-05-25 13:38:02,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:02,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +12: [2023-05-25 13:38:02,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt... +26: [2023-05-25 13:38:02,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt... +26: [2023-05-25 13:38:02,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt... +28: [2023-05-25 13:38:02,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_13_optim_states.pt... +28: [2023-05-25 13:38:02,660] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_13_optim_states.pt... +12: [2023-05-25 13:38:02,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +12: [2023-05-25 13:38:02,660] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +24: [2023-05-25 13:38:02,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:02,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +12: [2023-05-25 13:38:02,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +25: [2023-05-25 13:38:02,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt. +20: [2023-05-25 13:38:02,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +20: [2023-05-25 13:38:02,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +25: [2023-05-25 13:38:02,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt. +27: [2023-05-25 13:38:02,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt. +20: [2023-05-25 13:38:02,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt... +24: [2023-05-25 13:38:02,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt... +30: [2023-05-25 13:38:02,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt. +24: [2023-05-25 13:38:02,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt... +20: [2023-05-25 13:38:02,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +21: [2023-05-25 13:38:02,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +27: [2023-05-25 13:38:02,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt. +21: [2023-05-25 13:38:02,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +30: [2023-05-25 13:38:02,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt. +21: [2023-05-25 13:38:02,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +21: [2023-05-25 13:38:02,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt. +13: [2023-05-25 13:38:02,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_09-model_02-model_states.pt. +18: [2023-05-25 13:38:02,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +13: [2023-05-25 13:38:02,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +30: [2023-05-25 13:38:02,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +30: [2023-05-25 13:38:02,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +30: [2023-05-25 13:38:02,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +30: [2023-05-25 13:38:02,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +28: [2023-05-25 13:38:02,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +30: [2023-05-25 13:38:02,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt... +30: [2023-05-25 13:38:02,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt... +18: [2023-05-25 13:38:02,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +28: [2023-05-25 13:38:02,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +26: [2023-05-25 13:38:02,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt. +26: [2023-05-25 13:38:02,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt. +28: [2023-05-25 13:38:02,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +14: [2023-05-25 13:38:02,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +14: [2023-05-25 13:38:02,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +28: [2023-05-25 13:38:02,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:02,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_14_optim_states.pt... +25: [2023-05-25 13:38:02,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_14_optim_states.pt... +25: [2023-05-25 13:38:02,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt. +30: [2023-05-25 13:38:02,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_14_optim_states.pt... +30: [2023-05-25 13:38:02,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_14_optim_states.pt... +25: [2023-05-25 13:38:02,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt. +30: [2023-05-25 13:38:02,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt. +28: [2023-05-25 13:38:02,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt... +28: [2023-05-25 13:38:02,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt... +30: [2023-05-25 13:38:02,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt. +10: [2023-05-25 13:38:02,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt. +10: [2023-05-25 13:38:02,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +24: [2023-05-25 13:38:02,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt. +28: [2023-05-25 13:38:02,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt. +28: [2023-05-25 13:38:02,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt. +24: [2023-05-25 13:38:02,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt. + 9: [2023-05-25 13:38:02,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +27: [2023-05-25 13:38:02,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +27: [2023-05-25 13:38:02,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:02,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +27: [2023-05-25 13:38:02,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt... +27: [2023-05-25 13:38:02,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt... +27: [2023-05-25 13:38:02,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt... +25: [2023-05-25 13:38:02,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt. +25: [2023-05-25 13:38:02,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt. +23: [2023-05-25 13:38:02,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +23: [2023-05-25 13:38:02,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:02,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +22: [2023-05-25 13:38:02,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt. +22: [2023-05-25 13:38:02,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt. +21: [2023-05-25 13:38:02,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +18: [2023-05-25 13:38:02,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +14: [2023-05-25 13:38:02,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +18: [2023-05-25 13:38:02,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +27: [2023-05-25 13:38:02,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt. +25: [2023-05-25 13:38:02,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +31: [2023-05-25 13:38:02,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt. +10: [2023-05-25 13:38:02,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +25: [2023-05-25 13:38:02,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +18: [2023-05-25 13:38:02,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +25: [2023-05-25 13:38:02,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +10: [2023-05-25 13:38:02,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +13: [2023-05-25 13:38:02,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +25: [2023-05-25 13:38:02,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +31: [2023-05-25 13:38:02,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt. +18: [2023-05-25 13:38:02,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +22: [2023-05-25 13:38:02,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +22: [2023-05-25 13:38:02,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +19: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +19: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +26: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_13_optim_states.pt... +26: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_13_optim_states.pt... +19: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +13: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +19: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +24: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_13_optim_states.pt... +24: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_13_optim_states.pt... + 8: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +24: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt. +16: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt. +16: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +24: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt. +22: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +16: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt. +16: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +22: [2023-05-25 13:38:02,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +19: [2023-05-25 13:38:02,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt... +27: [2023-05-25 13:38:02,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt. +19: [2023-05-25 13:38:02,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt... +19: [2023-05-25 13:38:02,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +28: [2023-05-25 13:38:02,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_14_optim_states.pt... +28: [2023-05-25 13:38:02,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_14_optim_states.pt... +19: [2023-05-25 13:38:02,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +16: [2023-05-25 13:38:02,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +25: [2023-05-25 13:38:02,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt... +16: [2023-05-25 13:38:02,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +30: [2023-05-25 13:38:02,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_13_optim_states.pt... +30: [2023-05-25 13:38:02,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_13_optim_states.pt... +20: [2023-05-25 13:38:02,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +31: [2023-05-25 13:38:02,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt. +31: [2023-05-25 13:38:02,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt. +12: [2023-05-25 13:38:02,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +12: [2023-05-25 13:38:02,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +14: [2023-05-25 13:38:02,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt. +23: [2023-05-25 13:38:02,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt. +14: [2023-05-25 13:38:02,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt. + 9: [2023-05-25 13:38:02,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt. +23: [2023-05-25 13:38:02,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +20: [2023-05-25 13:38:02,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:02,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +28: [2023-05-25 13:38:02,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt. +28: [2023-05-25 13:38:02,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt. +20: [2023-05-25 13:38:02,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt. +26: [2023-05-25 13:38:02,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt. +26: [2023-05-25 13:38:02,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt. +20: [2023-05-25 13:38:02,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt. +22: [2023-05-25 13:38:02,700] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +22: [2023-05-25 13:38:02,700] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +31: [2023-05-25 13:38:02,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +27: [2023-05-25 13:38:02,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_14_optim_states.pt... +27: [2023-05-25 13:38:02,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_14_optim_states.pt... +21: [2023-05-25 13:38:02,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +31: [2023-05-25 13:38:02,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +21: [2023-05-25 13:38:02,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt. +21: [2023-05-25 13:38:02,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt. +31: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:02,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt... +25: [2023-05-25 13:38:02,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:02,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:02,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:02,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt... +25: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt... +31: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt... +16: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +31: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt... +26: [2023-05-25 13:38:02,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt. +24: [2023-05-25 13:38:02,706] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:02,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt. +16: [2023-05-25 13:38:02,706] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +23: [2023-05-25 13:38:02,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt. +23: [2023-05-25 13:38:02,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt. +31: [2023-05-25 13:38:02,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt. +24: [2023-05-25 13:38:02,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +17: [2023-05-25 13:38:02,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt. +24: [2023-05-25 13:38:02,706] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +17: [2023-05-25 13:38:02,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_02-model_states.pt. + 8: [2023-05-25 13:38:02,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt. +24: [2023-05-25 13:38:02,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:02,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +29: [2023-05-25 13:38:02,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt. +24: [2023-05-25 13:38:02,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt. +12: [2023-05-25 13:38:02,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +25: [2023-05-25 13:38:02,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt. +12: [2023-05-25 13:38:02,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +14: [2023-05-25 13:38:02,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +18: [2023-05-25 13:38:02,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +18: [2023-05-25 13:38:02,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +25: [2023-05-25 13:38:02,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt. +20: [2023-05-25 13:38:02,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +24: [2023-05-25 13:38:02,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt... +24: [2023-05-25 13:38:02,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt... + 5: [2023-05-25 13:38:02,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_00-model_states.pt. +20: [2023-05-25 13:38:02,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +31: [2023-05-25 13:38:02,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt... +30: [2023-05-25 13:38:02,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt. +30: [2023-05-25 13:38:02,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt. +31: [2023-05-25 13:38:02,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +14: [2023-05-25 13:38:02,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +27: [2023-05-25 13:38:02,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt. +29: [2023-05-25 13:38:02,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt. +27: [2023-05-25 13:38:02,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt... +31: [2023-05-25 13:38:02,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +20: [2023-05-25 13:38:02,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +31: [2023-05-25 13:38:02,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +20: [2023-05-25 13:38:02,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +18: [2023-05-25 13:38:02,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +18: [2023-05-25 13:38:02,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt... +31: [2023-05-25 13:38:02,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt... +18: [2023-05-25 13:38:02,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt... +31: [2023-05-25 13:38:02,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt... +21: [2023-05-25 13:38:02,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +31: [2023-05-25 13:38:02,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt. +28: [2023-05-25 13:38:02,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +31: [2023-05-25 13:38:02,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt. +28: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +28: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +26: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +12: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt. + 9: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +31: [2023-05-25 13:38:02,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt. +26: [2023-05-25 13:38:02,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +10: [2023-05-25 13:38:02,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +24: [2023-05-25 13:38:02,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt. +12: [2023-05-25 13:38:02,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt. +26: [2023-05-25 13:38:02,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt... +26: [2023-05-25 13:38:02,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt... +21: [2023-05-25 13:38:02,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +24: [2023-05-25 13:38:02,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt. +10: [2023-05-25 13:38:02,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt... +18: [2023-05-25 13:38:02,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +17: [2023-05-25 13:38:02,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +21: [2023-05-25 13:38:02,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +26: [2023-05-25 13:38:02,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:02,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +23: [2023-05-25 13:38:02,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +26: [2023-05-25 13:38:02,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:02,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +31: [2023-05-25 13:38:02,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt. +26: [2023-05-25 13:38:02,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:02,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +31: [2023-05-25 13:38:02,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +17: [2023-05-25 13:38:02,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +25: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt. +31: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt... +26: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt. +26: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt... +26: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt... +10: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +26: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt. +23: [2023-05-25 13:38:02,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +10: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt... +28: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt... +28: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt... +19: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +15: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt. +26: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt. +26: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt. +15: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt. +29: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +29: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:02,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +29: [2023-05-25 13:38:02,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:02,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt. +19: [2023-05-25 13:38:02,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +20: [2023-05-25 13:38:02,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +27: [2023-05-25 13:38:02,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +20: [2023-05-25 13:38:02,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt... +18: [2023-05-25 13:38:02,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +27: [2023-05-25 13:38:02,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +12: [2023-05-25 13:38:02,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt. +27: [2023-05-25 13:38:02,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +27: [2023-05-25 13:38:02,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +12: [2023-05-25 13:38:02,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt. +24: [2023-05-25 13:38:02,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:02,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:02,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt. +24: [2023-05-25 13:38:02,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:02,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:02,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt... +27: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt... +30: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt... +24: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt... +30: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +31: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_14_optim_states.pt... +31: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_14_optim_states.pt... +30: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +30: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_14_optim_states.pt... +24: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_14_optim_states.pt... +12: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +12: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +30: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt... +30: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt... +31: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt... +14: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +31: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt. +27: [2023-05-25 13:38:02,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt. +25: [2023-05-25 13:38:02,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_13_optim_states.pt... +25: [2023-05-25 13:38:02,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_13_optim_states.pt... +24: [2023-05-25 13:38:02,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt. +14: [2023-05-25 13:38:02,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt... +24: [2023-05-25 13:38:02,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt. +25: [2023-05-25 13:38:02,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_15_optim_states.pt... +29: [2023-05-25 13:38:02,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt... +29: [2023-05-25 13:38:02,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt... +18: [2023-05-25 13:38:02,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +26: [2023-05-25 13:38:02,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_15_optim_states.pt... +26: [2023-05-25 13:38:02,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_15_optim_states.pt... +27: [2023-05-25 13:38:02,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt. +26: [2023-05-25 13:38:02,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_14_optim_states.pt... +26: [2023-05-25 13:38:02,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_14_optim_states.pt... +25: [2023-05-25 13:38:02,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_15_optim_states.pt... +16: [2023-05-25 13:38:02,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +16: [2023-05-25 13:38:02,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +16: [2023-05-25 13:38:02,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +31: [2023-05-25 13:38:02,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_13_optim_states.pt... +31: [2023-05-25 13:38:02,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_15_optim_states.pt... +31: [2023-05-25 13:38:02,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_13_optim_states.pt... +31: [2023-05-25 13:38:02,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_15_optim_states.pt... +20: [2023-05-25 13:38:02,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt... +20: [2023-05-25 13:38:02,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt... +22: [2023-05-25 13:38:02,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +22: [2023-05-25 13:38:02,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +22: [2023-05-25 13:38:02,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +22: [2023-05-25 13:38:02,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +29: [2023-05-25 13:38:02,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt. +16: [2023-05-25 13:38:02,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt... +15: [2023-05-25 13:38:02,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:02,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +29: [2023-05-25 13:38:02,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt. +22: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt... +24: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_15_optim_states.pt... +30: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt. +30: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt. +24: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_15_optim_states.pt... +22: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt... +19: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +13: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt. +16: [2023-05-25 13:38:02,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt. +13: [2023-05-25 13:38:02,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt. + 9: [2023-05-25 13:38:02,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt. +14: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +19: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt... +12: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +28: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt. +16: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt... +13: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt. +28: [2023-05-25 13:38:02,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt. +13: [2023-05-25 13:38:02,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt. +29: [2023-05-25 13:38:02,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt. +29: [2023-05-25 13:38:02,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_02-model_states.pt. +12: [2023-05-25 13:38:02,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:02,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt... +17: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt. +17: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt. +21: [2023-05-25 13:38:02,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +16: [2023-05-25 13:38:02,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +21: [2023-05-25 13:38:02,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt... +23: [2023-05-25 13:38:02,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +16: [2023-05-25 13:38:02,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +22: [2023-05-25 13:38:02,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +15: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt. +17: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +17: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +22: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +23: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt. +15: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt. +17: [2023-05-25 13:38:02,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +23: [2023-05-25 13:38:02,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt. +23: [2023-05-25 13:38:02,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:02,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt... +23: [2023-05-25 13:38:02,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +17: [2023-05-25 13:38:02,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +29: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_13_optim_states.pt... +29: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_13_optim_states.pt... + 8: [2023-05-25 13:38:02,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +23: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt... +29: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +29: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +29: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt... + 8: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt... +29: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt... + 8: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +17: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +11: [2023-05-25 13:38:02,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt. +17: [2023-05-25 13:38:02,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +11: [2023-05-25 13:38:02,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt... +29: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt. +13: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +29: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_02-model_states.pt. +13: [2023-05-25 13:38:02,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:02,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +17: [2023-05-25 13:38:02,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +17: [2023-05-25 13:38:02,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +13: [2023-05-25 13:38:02,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt. + 2: [2023-05-25 13:38:02,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt. +23: [2023-05-25 13:38:02,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt. + 2: [2023-05-25 13:38:02,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt. +12: [2023-05-25 13:38:02,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +12: [2023-05-25 13:38:02,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +15: [2023-05-25 13:38:02,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +23: [2023-05-25 13:38:02,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +13: [2023-05-25 13:38:02,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:02,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt. +15: [2023-05-25 13:38:02,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt. +12: [2023-05-25 13:38:02,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt... +12: [2023-05-25 13:38:02,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt... +14: [2023-05-25 13:38:02,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt. +14: [2023-05-25 13:38:02,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt. +12: [2023-05-25 13:38:02,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +11: [2023-05-25 13:38:02,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +17: [2023-05-25 13:38:02,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt. +27: [2023-05-25 13:38:02,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_15_optim_states.pt... +27: [2023-05-25 13:38:02,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_15_optim_states.pt... + 5: [2023-05-25 13:38:02,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt. +17: [2023-05-25 13:38:02,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt. +12: [2023-05-25 13:38:02,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt... +11: [2023-05-25 13:38:02,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +17: [2023-05-25 13:38:02,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt... +17: [2023-05-25 13:38:02,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt... + 2: [2023-05-25 13:38:02,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +12: [2023-05-25 13:38:02,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +15: [2023-05-25 13:38:02,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +15: [2023-05-25 13:38:02,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt. + 3: [2023-05-25 13:38:02,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt. +11: [2023-05-25 13:38:02,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt. +11: [2023-05-25 13:38:02,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt. + 2: [2023-05-25 13:38:02,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +12: [2023-05-25 13:38:02,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt... + 3: [2023-05-25 13:38:02,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +14: [2023-05-25 13:38:02,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +15: [2023-05-25 13:38:02,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +29: [2023-05-25 13:38:02,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt. + 3: [2023-05-25 13:38:02,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +15: [2023-05-25 13:38:02,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt... +15: [2023-05-25 13:38:02,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt... +15: [2023-05-25 13:38:02,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +29: [2023-05-25 13:38:02,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_03-model_states.pt. +27: [2023-05-25 13:38:02,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt. +27: [2023-05-25 13:38:02,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_44-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt. +14: [2023-05-25 13:38:02,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +17: [2023-05-25 13:38:02,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +14: [2023-05-25 13:38:02,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +23: [2023-05-25 13:38:02,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt. +14: [2023-05-25 13:38:02,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt. +23: [2023-05-25 13:38:02,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt. +17: [2023-05-25 13:38:02,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt... +11: [2023-05-25 13:38:02,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +17: [2023-05-25 13:38:02,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt. +17: [2023-05-25 13:38:02,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +17: [2023-05-25 13:38:02,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt... +11: [2023-05-25 13:38:02,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt... + 0: [2023-05-25 13:38:02,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +15: [2023-05-25 13:38:02,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +13: [2023-05-25 13:38:02,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt. + 3: [2023-05-25 13:38:02,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +11: [2023-05-25 13:38:02,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:02,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt... + 4: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +23: [2023-05-25 13:38:02,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +11: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt. +13: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +12: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt. +13: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +12: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt. +13: [2023-05-25 13:38:02,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt... +23: [2023-05-25 13:38:02,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt... +23: [2023-05-25 13:38:02,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt. +13: [2023-05-25 13:38:02,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +27: [2023-05-25 13:38:02,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +29: [2023-05-25 13:38:02,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +13: [2023-05-25 13:38:02,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt... +13: [2023-05-25 13:38:02,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt... +23: [2023-05-25 13:38:02,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt. +29: [2023-05-25 13:38:02,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:02,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +14: [2023-05-25 13:38:02,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +14: [2023-05-25 13:38:02,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +27: [2023-05-25 13:38:02,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +29: [2023-05-25 13:38:02,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt... +17: [2023-05-25 13:38:02,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +27: [2023-05-25 13:38:02,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +13: [2023-05-25 13:38:02,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt... +27: [2023-05-25 13:38:02,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt... +22: [2023-05-25 13:38:02,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt. +19: [2023-05-25 13:38:02,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt. +27: [2023-05-25 13:38:02,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt... +29: [2023-05-25 13:38:02,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt... +22: [2023-05-25 13:38:02,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt. +19: [2023-05-25 13:38:02,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt. +15: [2023-05-25 13:38:02,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +29: [2023-05-25 13:38:02,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_00-model_states.pt. +22: [2023-05-25 13:38:02,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt. +29: [2023-05-25 13:38:02,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt... +22: [2023-05-25 13:38:02,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt. +16: [2023-05-25 13:38:02,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt. +10: [2023-05-25 13:38:02,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt. +10: [2023-05-25 13:38:02,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt. +29: [2023-05-25 13:38:02,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_14_optim_states.pt... +29: [2023-05-25 13:38:02,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_14_optim_states.pt... +10: [2023-05-25 13:38:02,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_03-model_states.pt. +10: [2023-05-25 13:38:02,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt. +16: [2023-05-25 13:38:02,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt. +20: [2023-05-25 13:38:02,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt. +20: [2023-05-25 13:38:02,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt. +17: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +17: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +15: [2023-05-25 13:38:02,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt. + 1: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +17: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +29: [2023-05-25 13:38:02,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt. +29: [2023-05-25 13:38:02,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_03-model_states.pt. +20: [2023-05-25 13:38:02,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt. +20: [2023-05-25 13:38:02,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt. +19: [2023-05-25 13:38:02,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +15: [2023-05-25 13:38:02,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +11: [2023-05-25 13:38:02,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt. +11: [2023-05-25 13:38:02,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt. + 1: [2023-05-25 13:38:02,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt. +19: [2023-05-25 13:38:02,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt. +21: [2023-05-25 13:38:02,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt. +21: [2023-05-25 13:38:02,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt. +27: [2023-05-25 13:38:02,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt. +11: [2023-05-25 13:38:02,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +15: [2023-05-25 13:38:02,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt... + 9: [2023-05-25 13:38:02,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +13: [2023-05-25 13:38:02,808] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt. +12: [2023-05-25 13:38:02,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +13: [2023-05-25 13:38:02,808] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt. +12: [2023-05-25 13:38:02,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +11: [2023-05-25 13:38:02,809] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt... +27: [2023-05-25 13:38:02,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_46-model_01-model_states.pt. +18: [2023-05-25 13:38:02,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt. +18: [2023-05-25 13:38:02,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_03-model_states.pt. + 9: [2023-05-25 13:38:02,810] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +23: [2023-05-25 13:38:02,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +14: [2023-05-25 13:38:02,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt. +23: [2023-05-25 13:38:02,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +19: [2023-05-25 13:38:02,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +14: [2023-05-25 13:38:02,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt... +18: [2023-05-25 13:38:02,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt. +22: [2023-05-25 13:38:02,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +10: [2023-05-25 13:38:02,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +19: [2023-05-25 13:38:02,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt. + 7: [2023-05-25 13:38:02,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_03-model_states.pt. +16: [2023-05-25 13:38:02,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +16: [2023-05-25 13:38:02,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +22: [2023-05-25 13:38:02,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +18: [2023-05-25 13:38:02,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt. +22: [2023-05-25 13:38:02,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +20: [2023-05-25 13:38:02,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +20: [2023-05-25 13:38:02,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +22: [2023-05-25 13:38:02,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +17: [2023-05-25 13:38:02,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +21: [2023-05-25 13:38:02,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +10: [2023-05-25 13:38:02,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +11: [2023-05-25 13:38:02,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:02,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +10: [2023-05-25 13:38:02,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:02,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +11: [2023-05-25 13:38:02,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +30: [2023-05-25 13:38:02,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_15_optim_states.pt... +10: [2023-05-25 13:38:02,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +30: [2023-05-25 13:38:02,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_15_optim_states.pt... +21: [2023-05-25 13:38:02,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +19: [2023-05-25 13:38:02,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +14: [2023-05-25 13:38:02,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt... + 5: [2023-05-25 13:38:02,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +20: [2023-05-25 13:38:02,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +20: [2023-05-25 13:38:02,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +15: [2023-05-25 13:38:02,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt... +19: [2023-05-25 13:38:02,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +16: [2023-05-25 13:38:02,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt. +16: [2023-05-25 13:38:02,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +18: [2023-05-25 13:38:02,825] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt. + 8: [2023-05-25 13:38:02,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,825] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +18: [2023-05-25 13:38:02,825] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +13: [2023-05-25 13:38:02,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +13: [2023-05-25 13:38:02,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +11: [2023-05-25 13:38:02,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt... + 5: [2023-05-25 13:38:02,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt... + 1: [2023-05-25 13:38:02,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +18: [2023-05-25 13:38:02,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt... +18: [2023-05-25 13:38:02,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +11: [2023-05-25 13:38:02,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +11: [2023-05-25 13:38:02,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt... +14: [2023-05-25 13:38:02,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +12: [2023-05-25 13:38:02,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +14: [2023-05-25 13:38:02,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +12: [2023-05-25 13:38:02,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +12: [2023-05-25 13:38:02,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +14: [2023-05-25 13:38:02,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt... +12: [2023-05-25 13:38:02,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +14: [2023-05-25 13:38:02,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt... +11: [2023-05-25 13:38:02,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt... +17: [2023-05-25 13:38:02,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +17: [2023-05-25 13:38:02,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +27: [2023-05-25 13:38:02,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_13_optim_states.pt... +27: [2023-05-25 13:38:02,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_13_optim_states.pt... + 7: [2023-05-25 13:38:02,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +19: [2023-05-25 13:38:02,835] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +19: [2023-05-25 13:38:02,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt... +17: [2023-05-25 13:38:02,837] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt... +17: [2023-05-25 13:38:02,837] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt... +16: [2023-05-25 13:38:02,837] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,838] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,838] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt... + 8: [2023-05-25 13:38:02,838] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +16: [2023-05-25 13:38:02,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +22: [2023-05-25 13:38:02,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt... + 8: [2023-05-25 13:38:02,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +23: [2023-05-25 13:38:02,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +22: [2023-05-25 13:38:02,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +21: [2023-05-25 13:38:02,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt. +21: [2023-05-25 13:38:02,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_33-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +11: [2023-05-25 13:38:02,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +23: [2023-05-25 13:38:02,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,843] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +11: [2023-05-25 13:38:02,843] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt... + 9: [2023-05-25 13:38:02,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,843] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt... +20: [2023-05-25 13:38:02,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +23: [2023-05-25 13:38:02,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt... +23: [2023-05-25 13:38:02,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt. + 9: [2023-05-25 13:38:02,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_22-model_02-model_states.pt. +10: [2023-05-25 13:38:02,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +22: [2023-05-25 13:38:02,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt... +20: [2023-05-25 13:38:02,846] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt... +16: [2023-05-25 13:38:02,846] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:02,846] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +10: [2023-05-25 13:38:02,846] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt... +22: [2023-05-25 13:38:02,846] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt... + 3: [2023-05-25 13:38:02,846] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,846] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +28: [2023-05-25 13:38:02,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_15_optim_states.pt... +28: [2023-05-25 13:38:02,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_15_optim_states.pt... + 1: [2023-05-25 13:38:02,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt... +21: [2023-05-25 13:38:02,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +19: [2023-05-25 13:38:02,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +20: [2023-05-25 13:38:02,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +19: [2023-05-25 13:38:02,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +19: [2023-05-25 13:38:02,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +20: [2023-05-25 13:38:02,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +19: [2023-05-25 13:38:02,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt... +29: [2023-05-25 13:38:02,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_15_optim_states.pt... + 5: [2023-05-25 13:38:02,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt... +22: [2023-05-25 13:38:02,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +29: [2023-05-25 13:38:02,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_15_optim_states.pt... +22: [2023-05-25 13:38:02,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +16: [2023-05-25 13:38:02,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt... +20: [2023-05-25 13:38:02,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +19: [2023-05-25 13:38:02,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt... +11: [2023-05-25 13:38:02,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +19: [2023-05-25 13:38:02,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt... +10: [2023-05-25 13:38:02,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +10: [2023-05-25 13:38:02,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +21: [2023-05-25 13:38:02,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +22: [2023-05-25 13:38:02,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt... +10: [2023-05-25 13:38:02,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +20: [2023-05-25 13:38:02,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt... +21: [2023-05-25 13:38:02,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +21: [2023-05-25 13:38:02,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt... +22: [2023-05-25 13:38:02,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +11: [2023-05-25 13:38:02,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt... + 5: [2023-05-25 13:38:02,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt... +10: [2023-05-25 13:38:02,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt... +13: [2023-05-25 13:38:02,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. +21: [2023-05-25 13:38:02,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt... +10: [2023-05-25 13:38:02,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt... +16: [2023-05-25 13:38:02,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +10: [2023-05-25 13:38:02,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt... + 9: [2023-05-25 13:38:02,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +18: [2023-05-25 13:38:02,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +18: [2023-05-25 13:38:02,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt... +16: [2023-05-25 13:38:02,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt... +13: [2023-05-25 13:38:02,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt... + 1: [2023-05-25 13:38:02,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +18: [2023-05-25 13:38:02,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +18: [2023-05-25 13:38:02,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +18: [2023-05-25 13:38:02,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt... +18: [2023-05-25 13:38:02,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt... +13: [2023-05-25 13:38:02,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,863] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt... +20: [2023-05-25 13:38:02,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +18: [2023-05-25 13:38:02,863] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt... +18: [2023-05-25 13:38:02,864] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt... +20: [2023-05-25 13:38:02,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt... +13: [2023-05-25 13:38:02,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt... + 5: [2023-05-25 13:38:02,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,868] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt... + 8: [2023-05-25 13:38:02,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt... + 0: [2023-05-25 13:38:02,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt... +16: [2023-05-25 13:38:02,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt... + 0: [2023-05-25 13:38:02,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt... +16: [2023-05-25 13:38:02,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +16: [2023-05-25 13:38:02,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt... +16: [2023-05-25 13:38:02,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt... +21: [2023-05-25 13:38:02,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:02,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt... + 9: [2023-05-25 13:38:02,884] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt... +21: [2023-05-25 13:38:02,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:02,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,889] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,891] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,905] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,905] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,905] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,905] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,905] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,905] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,906] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt... + 4: [2023-05-25 13:38:02,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt... + 4: [2023-05-25 13:38:02,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt... + 6: [2023-05-25 13:38:02,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt... + 6: [2023-05-25 13:38:02,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt... + 6: [2023-05-25 13:38:02,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt... + 2: [2023-05-25 13:38:02,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt... + 4: [2023-05-25 13:38:02,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +13: [2023-05-25 13:38:02,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +13: [2023-05-25 13:38:02,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +15: [2023-05-25 13:38:02,952] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +15: [2023-05-25 13:38:02,952] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +13: [2023-05-25 13:38:02,953] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +15: [2023-05-25 13:38:02,953] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +15: [2023-05-25 13:38:02,954] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +13: [2023-05-25 13:38:02,954] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +18: [2023-05-25 13:38:02,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt. +18: [2023-05-25 13:38:02,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt. + 2: [2023-05-25 13:38:02,959] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt. + 8: [2023-05-25 13:38:02,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +18: [2023-05-25 13:38:02,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +18: [2023-05-25 13:38:02,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +14: [2023-05-25 13:38:02,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt. +14: [2023-05-25 13:38:02,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt. +15: [2023-05-25 13:38:02,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +15: [2023-05-25 13:38:02,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt. + 9: [2023-05-25 13:38:02,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt. +14: [2023-05-25 13:38:02,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +14: [2023-05-25 13:38:02,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:02,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt. +10: [2023-05-25 13:38:02,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt. +14: [2023-05-25 13:38:02,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +14: [2023-05-25 13:38:02,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +14: [2023-05-25 13:38:02,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +13: [2023-05-25 13:38:02,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +14: [2023-05-25 13:38:02,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +13: [2023-05-25 13:38:02,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +20: [2023-05-25 13:38:02,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:02,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt. +20: [2023-05-25 13:38:02,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +11: [2023-05-25 13:38:02,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt. +11: [2023-05-25 13:38:02,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt. +20: [2023-05-25 13:38:03,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +20: [2023-05-25 13:38:03,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +20: [2023-05-25 13:38:03,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt. + 9: [2023-05-25 13:38:03,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:03,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt. +10: [2023-05-25 13:38:03,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +10: [2023-05-25 13:38:03,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +10: [2023-05-25 13:38:03,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:03,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:03,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +11: [2023-05-25 13:38:03,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +11: [2023-05-25 13:38:03,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt... +27: [2023-05-25 13:38:03,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_12_optim_states.pt. +27: [2023-05-25 13:38:03,010] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 216 +11: [2023-05-25 13:38:03,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt... +20: [2023-05-25 13:38:03,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +11: [2023-05-25 13:38:03,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +11: [2023-05-25 13:38:03,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +11: [2023-05-25 13:38:03,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +23: [2023-05-25 13:38:03,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt. +19: [2023-05-25 13:38:03,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +23: [2023-05-25 13:38:03,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt. +19: [2023-05-25 13:38:03,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:03,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +19: [2023-05-25 13:38:03,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +14: [2023-05-25 13:38:03,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt. +20: [2023-05-25 13:38:03,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +15: [2023-05-25 13:38:03,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt. +15: [2023-05-25 13:38:03,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt. +12: [2023-05-25 13:38:03,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +14: [2023-05-25 13:38:03,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt. +12: [2023-05-25 13:38:03,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +15: [2023-05-25 13:38:03,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_04_optim_states.pt... +15: [2023-05-25 13:38:03,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_04_optim_states.pt... + 3: [2023-05-25 13:38:03,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +12: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +12: [2023-05-25 13:38:03,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt. + 8: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt. + 8: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +12: [2023-05-25 13:38:03,022] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt. +12: [2023-05-25 13:38:03,022] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt. + 8: [2023-05-25 13:38:03,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt... + 8: [2023-05-25 13:38:03,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt... +15: [2023-05-25 13:38:03,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt. +14: [2023-05-25 13:38:03,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +12: [2023-05-25 13:38:03,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt. +15: [2023-05-25 13:38:03,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt. +12: [2023-05-25 13:38:03,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt. +23: [2023-05-25 13:38:03,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +14: [2023-05-25 13:38:03,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt... +14: [2023-05-25 13:38:03,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +14: [2023-05-25 13:38:03,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +14: [2023-05-25 13:38:03,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +15: [2023-05-25 13:38:03,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +15: [2023-05-25 13:38:03,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +14: [2023-05-25 13:38:03,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +18: [2023-05-25 13:38:03,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:03,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:03,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:03,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:03,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +18: [2023-05-25 13:38:03,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +14: [2023-05-25 13:38:03,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +14: [2023-05-25 13:38:03,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt... +23: [2023-05-25 13:38:03,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +10: [2023-05-25 13:38:03,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +12: [2023-05-25 13:38:03,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +12: [2023-05-25 13:38:03,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt. +18: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt... +18: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt... +10: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt. +10: [2023-05-25 13:38:03,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt. +16: [2023-05-25 13:38:03,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +16: [2023-05-25 13:38:03,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +16: [2023-05-25 13:38:03,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +15: [2023-05-25 13:38:03,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:03,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +10: [2023-05-25 13:38:03,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +16: [2023-05-25 13:38:03,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +15: [2023-05-25 13:38:03,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +20: [2023-05-25 13:38:03,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:03,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:03,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +12: [2023-05-25 13:38:03,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:03,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +11: [2023-05-25 13:38:03,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt. +12: [2023-05-25 13:38:03,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +11: [2023-05-25 13:38:03,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt. +10: [2023-05-25 13:38:03,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt... +20: [2023-05-25 13:38:03,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt... +10: [2023-05-25 13:38:03,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt... +13: [2023-05-25 13:38:03,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_04_optim_states.pt... +13: [2023-05-25 13:38:03,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_04_optim_states.pt... +22: [2023-05-25 13:38:03,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +23: [2023-05-25 13:38:03,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:03,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +23: [2023-05-25 13:38:03,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:03,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +22: [2023-05-25 13:38:03,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +23: [2023-05-25 13:38:03,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +23: [2023-05-25 13:38:03,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt. + 0: [2023-05-25 13:38:03,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt. + 0: [2023-05-25 13:38:03,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +10: [2023-05-25 13:38:03,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:03,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt. +13: [2023-05-25 13:38:03,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt. +13: [2023-05-25 13:38:03,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt. +13: [2023-05-25 13:38:03,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt. +13: [2023-05-25 13:38:03,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_01-model_states.pt. +11: [2023-05-25 13:38:03,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +19: [2023-05-25 13:38:03,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +11: [2023-05-25 13:38:03,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +11: [2023-05-25 13:38:03,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +11: [2023-05-25 13:38:03,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt... + 8: [2023-05-25 13:38:03,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +19: [2023-05-25 13:38:03,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +11: [2023-05-25 13:38:03,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +20: [2023-05-25 13:38:03,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +11: [2023-05-25 13:38:03,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt... +11: [2023-05-25 13:38:03,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt... +11: [2023-05-25 13:38:03,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt... + 3: [2023-05-25 13:38:03,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt... +12: [2023-05-25 13:38:03,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +12: [2023-05-25 13:38:03,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +20: [2023-05-25 13:38:03,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt... +13: [2023-05-25 13:38:03,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt. +21: [2023-05-25 13:38:03,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt. +17: [2023-05-25 13:38:03,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt. +13: [2023-05-25 13:38:03,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt. +16: [2023-05-25 13:38:03,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt. +17: [2023-05-25 13:38:03,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt. +18: [2023-05-25 13:38:03,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +16: [2023-05-25 13:38:03,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt. +21: [2023-05-25 13:38:03,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt. +18: [2023-05-25 13:38:03,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +17: [2023-05-25 13:38:03,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:03,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +22: [2023-05-25 13:38:03,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +22: [2023-05-25 13:38:03,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt. +14: [2023-05-25 13:38:03,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt. +20: [2023-05-25 13:38:03,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_08_optim_states.pt... +20: [2023-05-25 13:38:03,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_08_optim_states.pt... +10: [2023-05-25 13:38:03,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_04_optim_states.pt... +10: [2023-05-25 13:38:03,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_04_optim_states.pt... + 0: [2023-05-25 13:38:03,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +17: [2023-05-25 13:38:03,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +17: [2023-05-25 13:38:03,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +12: [2023-05-25 13:38:03,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +14: [2023-05-25 13:38:03,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt... + 5: [2023-05-25 13:38:03,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_10-model_02-model_states.pt. +23: [2023-05-25 13:38:03,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt. +14: [2023-05-25 13:38:03,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_04_optim_states.pt... +15: [2023-05-25 13:38:03,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +15: [2023-05-25 13:38:03,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +14: [2023-05-25 13:38:03,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_04_optim_states.pt... +19: [2023-05-25 13:38:03,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt. +19: [2023-05-25 13:38:03,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt. +12: [2023-05-25 13:38:03,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt... +23: [2023-05-25 13:38:03,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt... +13: [2023-05-25 13:38:03,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +13: [2023-05-25 13:38:03,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +15: [2023-05-25 13:38:03,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +15: [2023-05-25 13:38:03,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt... +15: [2023-05-25 13:38:03,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt... + 9: [2023-05-25 13:38:03,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +23: [2023-05-25 13:38:03,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +13: [2023-05-25 13:38:03,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +15: [2023-05-25 13:38:03,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt... +14: [2023-05-25 13:38:03,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +16: [2023-05-25 13:38:03,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +12: [2023-05-25 13:38:03,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +16: [2023-05-25 13:38:03,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt... +21: [2023-05-25 13:38:03,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +23: [2023-05-25 13:38:03,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt... + 4: [2023-05-25 13:38:03,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +10: [2023-05-25 13:38:03,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt. +10: [2023-05-25 13:38:03,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +12: [2023-05-25 13:38:03,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt... +13: [2023-05-25 13:38:03,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt... +14: [2023-05-25 13:38:03,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt... +21: [2023-05-25 13:38:03,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +12: [2023-05-25 13:38:03,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +17: [2023-05-25 13:38:03,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +23: [2023-05-25 13:38:03,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:03,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt. +15: [2023-05-25 13:38:03,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +17: [2023-05-25 13:38:03,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +19: [2023-05-25 13:38:03,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_08_optim_states.pt... +19: [2023-05-25 13:38:03,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_08_optim_states.pt... +16: [2023-05-25 13:38:03,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:03,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt... + 1: [2023-05-25 13:38:03,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt. +23: [2023-05-25 13:38:03,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +10: [2023-05-25 13:38:03,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +16: [2023-05-25 13:38:03,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:03,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +12: [2023-05-25 13:38:03,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt... +15: [2023-05-25 13:38:03,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt... +13: [2023-05-25 13:38:03,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +10: [2023-05-25 13:38:03,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt... + 4: [2023-05-25 13:38:03,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt... +12: [2023-05-25 13:38:03,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +13: [2023-05-25 13:38:03,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +12: [2023-05-25 13:38:03,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt... + 1: [2023-05-25 13:38:03,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +22: [2023-05-25 13:38:03,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt. +19: [2023-05-25 13:38:03,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +19: [2023-05-25 13:38:03,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt... + 1: [2023-05-25 13:38:03,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt... +15: [2023-05-25 13:38:03,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt. +15: [2023-05-25 13:38:03,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt. +11: [2023-05-25 13:38:03,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +19: [2023-05-25 13:38:03,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt. +10: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +11: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_04_optim_states.pt... +11: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_04_optim_states.pt... +12: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt. +12: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt. +14: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt. +14: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt. + 2: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt. +18: [2023-05-25 13:38:03,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_08_optim_states.pt... +18: [2023-05-25 13:38:03,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_08_optim_states.pt... +29: [2023-05-25 13:38:03,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_12_optim_states.pt. +29: [2023-05-25 13:38:03,089] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 236 +10: [2023-05-25 13:38:03,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +12: [2023-05-25 13:38:03,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_04_optim_states.pt... +12: [2023-05-25 13:38:03,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_04_optim_states.pt... +19: [2023-05-25 13:38:03,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt. +10: [2023-05-25 13:38:03,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +11: [2023-05-25 13:38:03,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt... +11: [2023-05-25 13:38:03,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt. + 2: [2023-05-25 13:38:03,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt. + 8: [2023-05-25 13:38:03,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_04_optim_states.pt... + 8: [2023-05-25 13:38:03,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_04_optim_states.pt... +11: [2023-05-25 13:38:03,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt... +10: [2023-05-25 13:38:03,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt... + 6: [2023-05-25 13:38:03,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt... + 3: [2023-05-25 13:38:03,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt. + 3: [2023-05-25 13:38:03,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt. +23: [2023-05-25 13:38:03,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_08_optim_states.pt... +23: [2023-05-25 13:38:03,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_08_optim_states.pt... + 0: [2023-05-25 13:38:03,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt... +11: [2023-05-25 13:38:03,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt. +11: [2023-05-25 13:38:03,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_03-model_states.pt. + 7: [2023-05-25 13:38:03,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +19: [2023-05-25 13:38:03,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:03,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_08_optim_states.pt... +16: [2023-05-25 13:38:03,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_08_optim_states.pt... +17: [2023-05-25 13:38:03,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +12: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +22: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_08_optim_states.pt... +22: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_08_optim_states.pt... + 5: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +14: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt. +15: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +17: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt... +15: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +14: [2023-05-25 13:38:03,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +16: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt. +19: [2023-05-25 13:38:03,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +17: [2023-05-25 13:38:03,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:03,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +12: [2023-05-25 13:38:03,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_23-model_02-model_states.pt. +17: [2023-05-25 13:38:03,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt. +17: [2023-05-25 13:38:03,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt. +13: [2023-05-25 13:38:03,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +13: [2023-05-25 13:38:03,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +16: [2023-05-25 13:38:03,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:03,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +13: [2023-05-25 13:38:03,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt... +16: [2023-05-25 13:38:03,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt... + 0: [2023-05-25 13:38:03,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +16: [2023-05-25 13:38:03,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt... + 3: [2023-05-25 13:38:03,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +22: [2023-05-25 13:38:03,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt... + 3: [2023-05-25 13:38:03,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +22: [2023-05-25 13:38:03,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +13: [2023-05-25 13:38:03,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +17: [2023-05-25 13:38:03,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +13: [2023-05-25 13:38:03,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +13: [2023-05-25 13:38:03,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +11: [2023-05-25 13:38:03,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt... + 5: [2023-05-25 13:38:03,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +22: [2023-05-25 13:38:03,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt... + 5: [2023-05-25 13:38:03,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt... + 1: [2023-05-25 13:38:03,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +11: [2023-05-25 13:38:03,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +13: [2023-05-25 13:38:03,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt... +13: [2023-05-25 13:38:03,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt... +13: [2023-05-25 13:38:03,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt... +13: [2023-05-25 13:38:03,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt... + 5: [2023-05-25 13:38:03,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt... +17: [2023-05-25 13:38:03,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt... + 9: [2023-05-25 13:38:03,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt... + 8: [2023-05-25 13:38:03,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt... + 8: [2023-05-25 13:38:03,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt... + 9: [2023-05-25 13:38:03,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:03,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +10: [2023-05-25 13:38:03,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +27: [2023-05-25 13:38:03,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_12_optim_states.pt. + 1: [2023-05-25 13:38:03,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_00-model_states.pt. +27: [2023-05-25 13:38:03,120] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 220 +17: [2023-05-25 13:38:03,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +17: [2023-05-25 13:38:03,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +12: [2023-05-25 13:38:03,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +19: [2023-05-25 13:38:03,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +14: [2023-05-25 13:38:03,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt... +31: [2023-05-25 13:38:03,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_12_optim_states.pt. +31: [2023-05-25 13:38:03,123] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 248 +19: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt... +28: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_12_optim_states.pt. +28: [2023-05-25 13:38:03,124] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 224 + 5: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +13: [2023-05-25 13:38:03,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +10: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt... +19: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +10: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +12: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt... + 9: [2023-05-25 13:38:03,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +13: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt... + 6: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt. +19: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt... + 0: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +19: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +21: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt. +21: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt. +31: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_12_optim_states.pt. +31: [2023-05-25 13:38:03,130] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 252 +15: [2023-05-25 13:38:03,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +14: [2023-05-25 13:38:03,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt. + 4: [2023-05-25 13:38:03,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt. +19: [2023-05-25 13:38:03,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt... + 4: [2023-05-25 13:38:03,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +15: [2023-05-25 13:38:03,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt... +15: [2023-05-25 13:38:03,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +17: [2023-05-25 13:38:03,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_08_optim_states.pt... +17: [2023-05-25 13:38:03,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_08_optim_states.pt... + 4: [2023-05-25 13:38:03,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +15: [2023-05-25 13:38:03,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt... +14: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt... +14: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt... + 7: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +19: [2023-05-25 13:38:03,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +19: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt... + 7: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +12: [2023-05-25 13:38:03,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt... + 7: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt... + 9: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt... + 3: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt... +12: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt... +11: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +21: [2023-05-25 13:38:03,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +21: [2023-05-25 13:38:03,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +11: [2023-05-25 13:38:03,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt. + 9: [2023-05-25 13:38:03,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_04_optim_states.pt... + 9: [2023-05-25 13:38:03,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_04_optim_states.pt... + 0: [2023-05-25 13:38:03,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt. +11: [2023-05-25 13:38:03,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt... + 4: [2023-05-25 13:38:03,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +11: [2023-05-25 13:38:03,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt... + 3: [2023-05-25 13:38:03,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. +26: [2023-05-25 13:38:03,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_12_optim_states.pt. +26: [2023-05-25 13:38:03,149] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 212 + 3: [2023-05-25 13:38:03,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt... + 9: [2023-05-25 13:38:03,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt... + 4: [2023-05-25 13:38:03,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +17: [2023-05-25 13:38:03,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:03,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:03,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt... +17: [2023-05-25 13:38:03,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt... + 9: [2023-05-25 13:38:03,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +29: [2023-05-25 13:38:03,157] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_12_optim_states.pt. +29: [2023-05-25 13:38:03,157] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 232 + 9: [2023-05-25 13:38:03,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt... +24: [2023-05-25 13:38:03,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_12_optim_states.pt. +24: [2023-05-25 13:38:03,160] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 192 + 9: [2023-05-25 13:38:03,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,161] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +23: [2023-05-25 13:38:03,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +23: [2023-05-25 13:38:03,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt. +21: [2023-05-25 13:38:03,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:03,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:03,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +16: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt. + 0: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt... + 0: [2023-05-25 13:38:03,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt... +22: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt. +22: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt. +21: [2023-05-25 13:38:03,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +21: [2023-05-25 13:38:03,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +22: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt. +22: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt. + 0: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +21: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt... +21: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt... +20: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt. +20: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +16: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt. +18: [2023-05-25 13:38:03,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +18: [2023-05-25 13:38:03,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt. +23: [2023-05-25 13:38:03,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt. +23: [2023-05-25 13:38:03,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +23: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +21: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +23: [2023-05-25 13:38:03,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:03,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt... +21: [2023-05-25 13:38:03,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt... +21: [2023-05-25 13:38:03,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt... +20: [2023-05-25 13:38:03,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt... +20: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt... +22: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +22: [2023-05-25 13:38:03,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +22: [2023-05-25 13:38:03,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +18: [2023-05-25 13:38:03,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +22: [2023-05-25 13:38:03,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,182] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,182] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,182] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,182] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,183] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +23: [2023-05-25 13:38:03,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt... +18: [2023-05-25 13:38:03,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_03-model_states.pt. +23: [2023-05-25 13:38:03,186] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +24: [2023-05-25 13:38:03,187] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_12_optim_states.pt. + 4: [2023-05-25 13:38:03,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +24: [2023-05-25 13:38:03,188] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 196 + 4: [2023-05-25 13:38:03,187] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +28: [2023-05-25 13:38:03,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_12_optim_states.pt. + 2: [2023-05-25 13:38:03,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +28: [2023-05-25 13:38:03,189] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 228 + 2: [2023-05-25 13:38:03,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt... + 1: [2023-05-25 13:38:03,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt... +21: [2023-05-25 13:38:03,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +21: [2023-05-25 13:38:03,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt. +21: [2023-05-25 13:38:03,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt. +21: [2023-05-25 13:38:03,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,196] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +19: [2023-05-25 13:38:03,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +19: [2023-05-25 13:38:03,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt. + 0: [2023-05-25 13:38:03,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +18: [2023-05-25 13:38:03,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt... + 2: [2023-05-25 13:38:03,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +18: [2023-05-25 13:38:03,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt. +18: [2023-05-25 13:38:03,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_03-model_states.pt. +18: [2023-05-25 13:38:03,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt... + 0: [2023-05-25 13:38:03,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +16: [2023-05-25 13:38:03,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt... + 2: [2023-05-25 13:38:03,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +20: [2023-05-25 13:38:03,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:03,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +23: [2023-05-25 13:38:03,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +16: [2023-05-25 13:38:03,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +23: [2023-05-25 13:38:03,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt... + 7: [2023-05-25 13:38:03,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +20: [2023-05-25 13:38:03,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt... +20: [2023-05-25 13:38:03,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt... +21: [2023-05-25 13:38:03,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:03,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt... +21: [2023-05-25 13:38:03,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +16: [2023-05-25 13:38:03,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt... +22: [2023-05-25 13:38:03,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:03,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +22: [2023-05-25 13:38:03,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:03,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:03,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt... +16: [2023-05-25 13:38:03,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt. +26: [2023-05-25 13:38:03,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_12_optim_states.pt. +16: [2023-05-25 13:38:03,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt. +26: [2023-05-25 13:38:03,215] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 208 + 5: [2023-05-25 13:38:03,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +23: [2023-05-25 13:38:03,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:03,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt... +19: [2023-05-25 13:38:03,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +22: [2023-05-25 13:38:03,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt... + 1: [2023-05-25 13:38:03,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +20: [2023-05-25 13:38:03,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt. +23: [2023-05-25 13:38:03,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:03,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +20: [2023-05-25 13:38:03,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt. +23: [2023-05-25 13:38:03,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt... +22: [2023-05-25 13:38:03,219] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +23: [2023-05-25 13:38:03,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt... +23: [2023-05-25 13:38:03,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:03,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt... + 1: [2023-05-25 13:38:03,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt... + 1: [2023-05-25 13:38:03,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt. +22: [2023-05-25 13:38:03,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt... +21: [2023-05-25 13:38:03,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_08_optim_states.pt... +21: [2023-05-25 13:38:03,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_08_optim_states.pt... + 3: [2023-05-25 13:38:03,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt. +18: [2023-05-25 13:38:03,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +23: [2023-05-25 13:38:03,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt... + 1: [2023-05-25 13:38:03,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +18: [2023-05-25 13:38:03,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt... + 6: [2023-05-25 13:38:03,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +16: [2023-05-25 13:38:03,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,230] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,230] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +20: [2023-05-25 13:38:03,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +20: [2023-05-25 13:38:03,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:03,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt... + 2: [2023-05-25 13:38:03,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt... +18: [2023-05-25 13:38:03,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:03,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt... +25: [2023-05-25 13:38:03,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_12_optim_states.pt. +25: [2023-05-25 13:38:03,234] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 204 +21: [2023-05-25 13:38:03,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +21: [2023-05-25 13:38:03,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt... + 1: [2023-05-25 13:38:03,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,239] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +21: [2023-05-25 13:38:03,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt... + 6: [2023-05-25 13:38:03,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt... +21: [2023-05-25 13:38:03,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +19: [2023-05-25 13:38:03,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,242] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt... + 2: [2023-05-25 13:38:03,243] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt... + 5: [2023-05-25 13:38:03,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +19: [2023-05-25 13:38:03,246] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt... +19: [2023-05-25 13:38:03,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:03,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt... +19: [2023-05-25 13:38:03,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt... +17: [2023-05-25 13:38:03,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt. +17: [2023-05-25 13:38:03,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_34-model_01-model_states.pt. +20: [2023-05-25 13:38:03,254] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt... +18: [2023-05-25 13:38:03,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:03,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt... +20: [2023-05-25 13:38:03,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +16: [2023-05-25 13:38:03,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +20: [2023-05-25 13:38:03,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt... +17: [2023-05-25 13:38:03,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +17: [2023-05-25 13:38:03,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:03,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt... + 3: [2023-05-25 13:38:03,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,269] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt... +16: [2023-05-25 13:38:03,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,271] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,271] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt... +16: [2023-05-25 13:38:03,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt... + 6: [2023-05-25 13:38:03,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt... + 6: [2023-05-25 13:38:03,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt... +12: [2023-05-25 13:38:03,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt. +12: [2023-05-25 13:38:03,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +17: [2023-05-25 13:38:03,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:03,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt... +30: [2023-05-25 13:38:03,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_12_optim_states.pt. +30: [2023-05-25 13:38:03,294] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 240 +18: [2023-05-25 13:38:03,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt. +18: [2023-05-25 13:38:03,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt. +17: [2023-05-25 13:38:03,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:03,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt... +12: [2023-05-25 13:38:03,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_05_optim_states.pt... +12: [2023-05-25 13:38:03,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_05_optim_states.pt... + 4: [2023-05-25 13:38:03,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt. +25: [2023-05-25 13:38:03,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_12_optim_states.pt. +25: [2023-05-25 13:38:03,302] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 200 + 8: [2023-05-25 13:38:03,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt. + 8: [2023-05-25 13:38:03,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +14: [2023-05-25 13:38:03,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt. +14: [2023-05-25 13:38:03,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt. +20: [2023-05-25 13:38:03,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt. +20: [2023-05-25 13:38:03,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt. + 9: [2023-05-25 13:38:03,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt. + 9: [2023-05-25 13:38:03,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt. +15: [2023-05-25 13:38:03,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt. +15: [2023-05-25 13:38:03,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt. +15: [2023-05-25 13:38:03,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_05_optim_states.pt... + 8: [2023-05-25 13:38:03,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_05_optim_states.pt... + 3: [2023-05-25 13:38:03,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +15: [2023-05-25 13:38:03,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt. +14: [2023-05-25 13:38:03,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_05_optim_states.pt... +14: [2023-05-25 13:38:03,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_05_optim_states.pt... + 3: [2023-05-25 13:38:03,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +18: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_10_optim_states.pt... +18: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_10_optim_states.pt... +10: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +10: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt... + 4: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt... + 7: [2023-05-25 13:38:03,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +20: [2023-05-25 13:38:03,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_10_optim_states.pt... +20: [2023-05-25 13:38:03,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_10_optim_states.pt... +11: [2023-05-25 13:38:03,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt. + 9: [2023-05-25 13:38:03,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_05_optim_states.pt... + 9: [2023-05-25 13:38:03,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_05_optim_states.pt... + 3: [2023-05-25 13:38:03,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt. +11: [2023-05-25 13:38:03,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt. + 7: [2023-05-25 13:38:03,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt. + 7: [2023-05-25 13:38:03,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt. + 3: [2023-05-25 13:38:03,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt... +17: [2023-05-25 13:38:03,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt. +17: [2023-05-25 13:38:03,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt. +10: [2023-05-25 13:38:03,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_05_optim_states.pt... +10: [2023-05-25 13:38:03,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_05_optim_states.pt... + 0: [2023-05-25 13:38:03,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt. + 5: [2023-05-25 13:38:03,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt. + 3: [2023-05-25 13:38:03,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt. +11: [2023-05-25 13:38:03,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt. +11: [2023-05-25 13:38:03,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt. +11: [2023-05-25 13:38:03,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_05_optim_states.pt... + 7: [2023-05-25 13:38:03,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +15: [2023-05-25 13:38:03,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_06_optim_states.pt... +15: [2023-05-25 13:38:03,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_06_optim_states.pt... +15: [2023-05-25 13:38:03,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_05_optim_states.pt... +15: [2023-05-25 13:38:03,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_05_optim_states.pt... +10: [2023-05-25 13:38:03,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt. +10: [2023-05-25 13:38:03,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,379] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt... + 3: [2023-05-25 13:38:03,379] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt... +11: [2023-05-25 13:38:03,380] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_05_optim_states.pt... +23: [2023-05-25 13:38:03,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,383] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +19: [2023-05-25 13:38:03,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt. +19: [2023-05-25 13:38:03,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt... +17: [2023-05-25 13:38:03,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_10_optim_states.pt... +17: [2023-05-25 13:38:03,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_10_optim_states.pt... + 6: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt... + 6: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt... +16: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt. +23: [2023-05-25 13:38:03,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt. +16: [2023-05-25 13:38:03,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt. +13: [2023-05-25 13:38:03,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt. +13: [2023-05-25 13:38:03,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_01-model_states.pt. +17: [2023-05-25 13:38:03,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt. + 4: [2023-05-25 13:38:03,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt. +17: [2023-05-25 13:38:03,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt. + 2: [2023-05-25 13:38:03,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +13: [2023-05-25 13:38:03,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt. +13: [2023-05-25 13:38:03,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt. +12: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt. +11: [2023-05-25 13:38:03,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_06_optim_states.pt... +11: [2023-05-25 13:38:03,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_06_optim_states.pt... +12: [2023-05-25 13:38:03,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt. +19: [2023-05-25 13:38:03,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +19: [2023-05-25 13:38:03,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt. + 0: [2023-05-25 13:38:03,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt... +16: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_10_optim_states.pt... +16: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_10_optim_states.pt... +22: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt. +22: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt. +10: [2023-05-25 13:38:03,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_06_optim_states.pt... +10: [2023-05-25 13:38:03,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_06_optim_states.pt... + 5: [2023-05-25 13:38:03,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt. + 5: [2023-05-25 13:38:03,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_11-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... +16: [2023-05-25 13:38:03,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt. +21: [2023-05-25 13:38:03,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt. +21: [2023-05-25 13:38:03,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt. +21: [2023-05-25 13:38:03,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt. +21: [2023-05-25 13:38:03,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_02-model_states.pt. +16: [2023-05-25 13:38:03,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt. + 0: [2023-05-25 13:38:03,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt. + 0: [2023-05-25 13:38:03,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt... + 6: [2023-05-25 13:38:03,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,429] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt. +17: [2023-05-25 13:38:03,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_11_optim_states.pt... +17: [2023-05-25 13:38:03,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_11_optim_states.pt... +13: [2023-05-25 13:38:03,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_05_optim_states.pt... +13: [2023-05-25 13:38:03,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_05_optim_states.pt... +23: [2023-05-25 13:38:03,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_10_optim_states.pt... +23: [2023-05-25 13:38:03,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_10_optim_states.pt... + 2: [2023-05-25 13:38:03,432] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt... +14: [2023-05-25 13:38:03,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt. +13: [2023-05-25 13:38:03,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_06_optim_states.pt... +13: [2023-05-25 13:38:03,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_06_optim_states.pt... +14: [2023-05-25 13:38:03,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt. + 0: [2023-05-25 13:38:03,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. +19: [2023-05-25 13:38:03,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_11_optim_states.pt... +19: [2023-05-25 13:38:03,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_11_optim_states.pt... + 2: [2023-05-25 13:38:03,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt... + 6: [2023-05-25 13:38:03,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +19: [2023-05-25 13:38:03,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_10_optim_states.pt... +19: [2023-05-25 13:38:03,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_10_optim_states.pt... + 1: [2023-05-25 13:38:03,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt... + 4: [2023-05-25 13:38:03,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt... +21: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_10_optim_states.pt... +21: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_10_optim_states.pt... +21: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_09_optim_states.pt... +21: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_09_optim_states.pt... +12: [2023-05-25 13:38:03,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_06_optim_states.pt... +12: [2023-05-25 13:38:03,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_06_optim_states.pt... + 0: [2023-05-25 13:38:03,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt. + 0: [2023-05-25 13:38:03,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. +18: [2023-05-25 13:38:03,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. +12: [2023-05-25 13:38:03,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt. +25: [2023-05-25 13:38:03,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_14_optim_states.pt. +12: [2023-05-25 13:38:03,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt. +30: [2023-05-25 13:38:03,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_14_optim_states.pt. +25: [2023-05-25 13:38:03,464] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 202 +30: [2023-05-25 13:38:03,464] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 242 + 5: [2023-05-25 13:38:03,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt... +16: [2023-05-25 13:38:03,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_11_optim_states.pt... +16: [2023-05-25 13:38:03,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_11_optim_states.pt... +18: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt... + 7: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +23: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt. +13: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt. +13: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt. +23: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +22: [2023-05-25 13:38:03,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_10_optim_states.pt... +22: [2023-05-25 13:38:03,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_10_optim_states.pt... + 9: [2023-05-25 13:38:03,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt. + 9: [2023-05-25 13:38:03,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_02-model_states.pt. + 0: [2023-05-25 13:38:03,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +21: [2023-05-25 13:38:03,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt. +21: [2023-05-25 13:38:03,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt. + 0: [2023-05-25 13:38:03,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,477] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt. +14: [2023-05-25 13:38:03,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_06_optim_states.pt... +14: [2023-05-25 13:38:03,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_06_optim_states.pt... + 6: [2023-05-25 13:38:03,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. +22: [2023-05-25 13:38:03,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt. +22: [2023-05-25 13:38:03,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +14: [2023-05-25 13:38:03,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt. +14: [2023-05-25 13:38:03,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt... +12: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_07_optim_states.pt... +12: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_07_optim_states.pt... +25: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_15_optim_states.pt. +30: [2023-05-25 13:38:03,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_12_optim_states.pt. +30: [2023-05-25 13:38:03,486] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 244 + 5: [2023-05-25 13:38:03,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt. +23: [2023-05-25 13:38:03,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt. +23: [2023-05-25 13:38:03,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt. +25: [2023-05-25 13:38:03,485] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 207 + 9: [2023-05-25 13:38:03,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_06_optim_states.pt... + 9: [2023-05-25 13:38:03,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_06_optim_states.pt... + 8: [2023-05-25 13:38:03,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_06_optim_states.pt... + 8: [2023-05-25 13:38:03,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_06_optim_states.pt... + 5: [2023-05-25 13:38:03,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. +23: [2023-05-25 13:38:03,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_09_optim_states.pt... +23: [2023-05-25 13:38:03,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_09_optim_states.pt... + 1: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt. + 2: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt. +10: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt. +10: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt. +20: [2023-05-25 13:38:03,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt. +20: [2023-05-25 13:38:03,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt. + 8: [2023-05-25 13:38:03,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_07_optim_states.pt... + 8: [2023-05-25 13:38:03,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_07_optim_states.pt... + 2: [2023-05-25 13:38:03,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt. + 2: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt. +21: [2023-05-25 13:38:03,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_11_optim_states.pt... +21: [2023-05-25 13:38:03,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_11_optim_states.pt... +28: [2023-05-25 13:38:03,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_13_optim_states.pt. +28: [2023-05-25 13:38:03,505] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 229 +26: [2023-05-25 13:38:03,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_13_optim_states.pt. +26: [2023-05-25 13:38:03,506] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 209 +31: [2023-05-25 13:38:03,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_15_optim_states.pt. +31: [2023-05-25 13:38:03,507] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 255 +18: [2023-05-25 13:38:03,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... + 5: [2023-05-25 13:38:03,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +18: [2023-05-25 13:38:03,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_07_optim_states.pt... + 6: [2023-05-25 13:38:03,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. +15: [2023-05-25 13:38:03,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt. +15: [2023-05-25 13:38:03,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_07_optim_states.pt... + 2: [2023-05-25 13:38:03,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +14: [2023-05-25 13:38:03,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_07_optim_states.pt... +14: [2023-05-25 13:38:03,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_07_optim_states.pt... + 2: [2023-05-25 13:38:03,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +29: [2023-05-25 13:38:03,517] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 236 + 2: [2023-05-25 13:38:03,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +22: [2023-05-25 13:38:03,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_09_optim_states.pt... +22: [2023-05-25 13:38:03,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_09_optim_states.pt... + 5: [2023-05-25 13:38:03,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_01-model_states.pt. +26: [2023-05-25 13:38:03,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_13_optim_states.pt. +26: [2023-05-25 13:38:03,522] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 213 + 0: [2023-05-25 13:38:03,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +22: [2023-05-25 13:38:03,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt. +22: [2023-05-25 13:38:03,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_03-model_states.pt. + 0: [2023-05-25 13:38:03,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +10: [2023-05-25 13:38:03,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_07_optim_states.pt... +10: [2023-05-25 13:38:03,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_07_optim_states.pt... + 6: [2023-05-25 13:38:03,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +29: [2023-05-25 13:38:03,524] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 232 +20: [2023-05-25 13:38:03,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_11_optim_states.pt... +20: [2023-05-25 13:38:03,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_11_optim_states.pt... +18: [2023-05-25 13:38:03,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_09_optim_states.pt... +18: [2023-05-25 13:38:03,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_09_optim_states.pt... + 0: [2023-05-25 13:38:03,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt... + 0: [2023-05-25 13:38:03,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt... + 0: [2023-05-25 13:38:03,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +18: [2023-05-25 13:38:03,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_11_optim_states.pt... +18: [2023-05-25 13:38:03,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_11_optim_states.pt... + 6: [2023-05-25 13:38:03,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... + 6: [2023-05-25 13:38:03,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... + 3: [2023-05-25 13:38:03,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +30: [2023-05-25 13:38:03,541] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 240 + 0: [2023-05-25 13:38:03,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. +22: [2023-05-25 13:38:03,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_11_optim_states.pt... +22: [2023-05-25 13:38:03,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_11_optim_states.pt... +30: [2023-05-25 13:38:03,544] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 244 + 4: [2023-05-25 13:38:03,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt... + 4: [2023-05-25 13:38:03,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt... + 4: [2023-05-25 13:38:03,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt... + 5: [2023-05-25 13:38:03,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt... + 5: [2023-05-25 13:38:03,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt... +15: [2023-05-25 13:38:03,553] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_07_optim_states.pt... +15: [2023-05-25 13:38:03,553] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_07_optim_states.pt... +23: [2023-05-25 13:38:03,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_11_optim_states.pt... +23: [2023-05-25 13:38:03,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_11_optim_states.pt... +29: [2023-05-25 13:38:03,557] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_15_optim_states.pt. +29: [2023-05-25 13:38:03,557] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 239 + 6: [2023-05-25 13:38:03,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. +30: [2023-05-25 13:38:03,560] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_13_optim_states.pt. +30: [2023-05-25 13:38:03,560] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 241 +20: [2023-05-25 13:38:03,560] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt. +20: [2023-05-25 13:38:03,560] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt... + 6: [2023-05-25 13:38:03,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt... +29: [2023-05-25 13:38:03,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_13_optim_states.pt. +29: [2023-05-25 13:38:03,561] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 233 + 0: [2023-05-25 13:38:03,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +25: [2023-05-25 13:38:03,566] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_14_optim_states.pt. +25: [2023-05-25 13:38:03,566] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 206 + 4: [2023-05-25 13:38:03,569] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,569] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt. +24: [2023-05-25 13:38:03,570] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 196 +28: [2023-05-25 13:38:03,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_15_optim_states.pt. +28: [2023-05-25 13:38:03,578] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 231 + 0: > overriding learning rate value to 0.0002 + 0: > overriding minimum learning rate value to 2e-05 + 0: > overriding warmup iterations value to 0 + 0: > overriding total number of iterations value to 1 + 0: > overriding decay style value to cosine + 0: [2023-05-25 13:38:03,579] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +29: [2023-05-25 13:38:03,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_14_optim_states.pt. +29: [2023-05-25 13:38:03,580] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 238 + 4: [2023-05-25 13:38:03,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. +13: [2023-05-25 13:38:03,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_07_optim_states.pt... +13: [2023-05-25 13:38:03,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_07_optim_states.pt... +19: [2023-05-25 13:38:03,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt. +11: [2023-05-25 13:38:03,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt. +19: [2023-05-25 13:38:03,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt. +11: [2023-05-25 13:38:03,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_24-model_03-model_states.pt. + 0: [2023-05-25 13:38:03,593] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt. + 0: [2023-05-25 13:38:03,594] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt. +24: [2023-05-25 13:38:03,594] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_14_optim_states.pt. +24: [2023-05-25 13:38:03,594] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 194 +24: [2023-05-25 13:38:03,594] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 192 +24: [2023-05-25 13:38:03,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_15_optim_states.pt. +24: [2023-05-25 13:38:03,598] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 195 +31: [2023-05-25 13:38:03,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_13_optim_states.pt. +31: [2023-05-25 13:38:03,600] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 249 + 4: [2023-05-25 13:38:03,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. +16: [2023-05-25 13:38:03,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt. +16: [2023-05-25 13:38:03,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt... +25: [2023-05-25 13:38:03,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_13_optim_states.pt. +25: [2023-05-25 13:38:03,606] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 201 +29: [2023-05-25 13:38:03,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_14_optim_states.pt. +29: [2023-05-25 13:38:03,608] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 234 + 0: [2023-05-25 13:38:03,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt. + 0: [2023-05-25 13:38:03,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt. +27: [2023-05-25 13:38:03,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_15_optim_states.pt. +27: [2023-05-25 13:38:03,610] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 219 +28: [2023-05-25 13:38:03,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_14_optim_states.pt. +28: [2023-05-25 13:38:03,610] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 226 +11: [2023-05-25 13:38:03,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_07_optim_states.pt... +11: [2023-05-25 13:38:03,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_07_optim_states.pt... +27: [2023-05-25 13:38:03,612] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_14_optim_states.pt. +27: [2023-05-25 13:38:03,612] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 222 +30: [2023-05-25 13:38:03,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_14_optim_states.pt. +30: [2023-05-25 13:38:03,613] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 246 + 4: [2023-05-25 13:38:03,613] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... + 4: [2023-05-25 13:38:03,613] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... + 4: [2023-05-25 13:38:03,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. +28: [2023-05-25 13:38:03,614] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 224 +28: [2023-05-25 13:38:03,615] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 228 + 4: [2023-05-25 13:38:03,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt... +31: [2023-05-25 13:38:03,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_14_optim_states.pt. +31: [2023-05-25 13:38:03,616] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 250 + 3: [2023-05-25 13:38:03,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt. +30: [2023-05-25 13:38:03,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_15_optim_states.pt. +30: [2023-05-25 13:38:03,619] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 243 + 1: [2023-05-25 13:38:03,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,620] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +29: [2023-05-25 13:38:03,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_13_optim_states.pt. +29: [2023-05-25 13:38:03,624] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 237 +19: [2023-05-25 13:38:03,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_09_optim_states.pt... +19: [2023-05-25 13:38:03,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_09_optim_states.pt... +28: [2023-05-25 13:38:03,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_14_optim_states.pt. +28: [2023-05-25 13:38:03,629] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 230 +24: [2023-05-25 13:38:03,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_13_optim_states.pt. +24: [2023-05-25 13:38:03,629] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 193 +26: [2023-05-25 13:38:03,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_15_optim_states.pt. +26: [2023-05-25 13:38:03,632] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 215 + 1: [2023-05-25 13:38:03,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,643] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. +25: [2023-05-25 13:38:03,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_15_optim_states.pt. +25: [2023-05-25 13:38:03,647] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 203 + 2: [2023-05-25 13:38:03,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +24: [2023-05-25 13:38:03,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_14_optim_states.pt. +24: [2023-05-25 13:38:03,649] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 198 + 6: [2023-05-25 13:38:03,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt. +26: [2023-05-25 13:38:03,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_14_optim_states.pt. +26: [2023-05-25 13:38:03,652] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 210 +20: [2023-05-25 13:38:03,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_09_optim_states.pt... +20: [2023-05-25 13:38:03,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_09_optim_states.pt... + 0: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt... + 0: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt... + 7: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt... + 2: [2023-05-25 13:38:03,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt... + 7: [2023-05-25 13:38:03,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt... + 3: [2023-05-25 13:38:03,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt... + 2: [2023-05-25 13:38:03,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt... + 3: [2023-05-25 13:38:03,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt... + 3: [2023-05-25 13:38:03,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt... + 3: [2023-05-25 13:38:03,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt... + 7: [2023-05-25 13:38:03,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt... + 7: [2023-05-25 13:38:03,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt... + 7: [2023-05-25 13:38:03,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt... + 7: [2023-05-25 13:38:03,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt... + 2: [2023-05-25 13:38:03,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt... +28: [2023-05-25 13:38:03,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_15_optim_states.pt. + 1: [2023-05-25 13:38:03,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt... +28: [2023-05-25 13:38:03,665] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 227 + 1: [2023-05-25 13:38:03,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt... + 6: [2023-05-25 13:38:03,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt... + 1: [2023-05-25 13:38:03,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt... + 1: [2023-05-25 13:38:03,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. +31: [2023-05-25 13:38:03,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_15_optim_states.pt. +31: [2023-05-25 13:38:03,668] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 251 + 2: [2023-05-25 13:38:03,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt... + 3: [2023-05-25 13:38:03,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt... +16: [2023-05-25 13:38:03,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_09_optim_states.pt... +16: [2023-05-25 13:38:03,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_09_optim_states.pt... + 2: [2023-05-25 13:38:03,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt... +27: [2023-05-25 13:38:03,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_15_optim_states.pt. +27: [2023-05-25 13:38:03,683] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 223 +24: [2023-05-25 13:38:03,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_13_optim_states.pt. +24: [2023-05-25 13:38:03,687] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 197 + 6: [2023-05-25 13:38:03,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. +27: [2023-05-25 13:38:03,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_13_optim_states.pt. +27: [2023-05-25 13:38:03,696] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 217 + 1: [2023-05-25 13:38:03,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,698] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. +27: [2023-05-25 13:38:03,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_14_optim_states.pt. +27: [2023-05-25 13:38:03,700] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 218 + 3: [2023-05-25 13:38:03,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. +17: [2023-05-25 13:38:03,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt. +17: [2023-05-25 13:38:03,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_35-model_01-model_states.pt. +25: [2023-05-25 13:38:03,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_13_optim_states.pt. +25: [2023-05-25 13:38:03,709] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 205 + 5: [2023-05-25 13:38:03,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt. + 5: [2023-05-25 13:38:03,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_12-model_02-model_states.pt. +27: [2023-05-25 13:38:03,723] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 220 + 6: [2023-05-25 13:38:03,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt. +26: [2023-05-25 13:38:03,726] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 208 + 6: [2023-05-25 13:38:03,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt. +25: [2023-05-25 13:38:03,727] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 204 +31: [2023-05-25 13:38:03,728] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 248 +25: [2023-05-25 13:38:03,729] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 200 +27: [2023-05-25 13:38:03,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_13_optim_states.pt. +27: [2023-05-25 13:38:03,731] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 221 +27: [2023-05-25 13:38:03,731] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 216 +26: [2023-05-25 13:38:03,732] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 212 +31: [2023-05-25 13:38:03,732] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 252 + 3: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... + 3: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... + 2: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... + 2: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... + 1: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... + 1: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... + 5: [2023-05-25 13:38:03,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt... +28: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_13_optim_states.pt. +28: [2023-05-25 13:38:03,749] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 225 + 7: [2023-05-25 13:38:03,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... + 7: [2023-05-25 13:38:03,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +24: [2023-05-25 13:38:03,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_15_optim_states.pt. +24: [2023-05-25 13:38:03,754] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 199 +30: [2023-05-25 13:38:03,756] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 243 + 5: [2023-05-25 13:38:03,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt... + 5: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt... + 5: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt. +26: [2023-05-25 13:38:03,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_14_optim_states.pt. +26: [2023-05-25 13:38:03,784] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 214 + 6: [2023-05-25 13:38:03,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_03_optim_states.pt... + 6: [2023-05-25 13:38:03,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_03_optim_states.pt... +26: [2023-05-25 13:38:03,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_15_optim_states.pt. +26: [2023-05-25 13:38:03,794] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 211 +30: [2023-05-25 13:38:03,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_15_optim_states.pt. +30: [2023-05-25 13:38:03,794] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 247 +31: [2023-05-25 13:38:03,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_13_optim_states.pt. +31: [2023-05-25 13:38:03,793] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 253 +31: [2023-05-25 13:38:03,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_14_optim_states.pt. +29: [2023-05-25 13:38:03,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_15_optim_states.pt. +29: [2023-05-25 13:38:03,800] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 235 +31: [2023-05-25 13:38:03,797] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 254 +17: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_09_optim_states.pt... +17: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_09_optim_states.pt... +30: [2023-05-25 13:38:03,808] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 247 +20: [2023-05-25 13:38:03,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_08_optim_states.pt. +20: [2023-05-25 13:38:03,809] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 164 + 5: [2023-05-25 13:38:03,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_03_optim_states.pt... + 5: [2023-05-25 13:38:03,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_03_optim_states.pt... + 4: [2023-05-25 13:38:03,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt. +29: [2023-05-25 13:38:03,812] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 235 +29: [2023-05-25 13:38:03,814] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 239 + 0: [2023-05-25 13:38:03,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt. + 0: [2023-05-25 13:38:03,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt. +20: [2023-05-25 13:38:03,822] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 164 + 0: [2023-05-25 13:38:03,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt. + 0: [2023-05-25 13:38:03,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt. + 4: [2023-05-25 13:38:03,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt. + 4: [2023-05-25 13:38:03,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt. + 4: [2023-05-25 13:38:03,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_01_optim_states.pt... + 4: [2023-05-25 13:38:03,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_01_optim_states.pt... +28: [2023-05-25 13:38:03,841] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 225 +28: [2023-05-25 13:38:03,842] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 229 +24: [2023-05-25 13:38:03,851] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 195 + 0: [2023-05-25 13:38:03,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_03_optim_states.pt... + 0: [2023-05-25 13:38:03,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_03_optim_states.pt... + 7: [2023-05-25 13:38:03,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt. + 7: [2023-05-25 13:38:03,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt. +24: [2023-05-25 13:38:03,858] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 199 +28: [2023-05-25 13:38:03,860] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 227 +28: [2023-05-25 13:38:03,860] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 231 +24: [2023-05-25 13:38:03,863] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 193 +24: [2023-05-25 13:38:03,864] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 197 + 0: [2023-05-25 13:38:03,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_01_optim_states.pt... + 0: [2023-05-25 13:38:03,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_01_optim_states.pt... + 2: [2023-05-25 13:38:03,884] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt. + 2: [2023-05-25 13:38:03,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt. +31: [2023-05-25 13:38:03,887] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 251 +19: [2023-05-25 13:38:03,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_08_optim_states.pt. +19: [2023-05-25 13:38:03,888] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 152 +31: [2023-05-25 13:38:03,888] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 255 +30: [2023-05-25 13:38:03,889] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_13_optim_states.pt. +30: [2023-05-25 13:38:03,889] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 245 + 1: [2023-05-25 13:38:03,899] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,899] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt. +19: [2023-05-25 13:38:03,905] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 152 + 6: [2023-05-25 13:38:03,906] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt. +18: [2023-05-25 13:38:03,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_08_optim_states.pt. +18: [2023-05-25 13:38:03,909] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 144 +26: [2023-05-25 13:38:03,915] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 215 +26: [2023-05-25 13:38:03,918] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 211 +18: [2023-05-25 13:38:03,921] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 144 + 5: [2023-05-25 13:38:03,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt. +19: [2023-05-25 13:38:03,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_08_optim_states.pt. +19: [2023-05-25 13:38:03,923] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 156 +25: [2023-05-25 13:38:03,930] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 203 + 1: [2023-05-25 13:38:03,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_03_optim_states.pt... + 7: [2023-05-25 13:38:03,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_03_optim_states.pt... +25: [2023-05-25 13:38:03,937] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 207 +30: [2023-05-25 13:38:03,938] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 241 + 1: [2023-05-25 13:38:03,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_03_optim_states.pt... + 1: [2023-05-25 13:38:03,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_03_optim_states.pt... +19: [2023-05-25 13:38:03,935] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 156 + 2: [2023-05-25 13:38:03,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_03_optim_states.pt... + 2: [2023-05-25 13:38:03,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_03_optim_states.pt... +30: [2023-05-25 13:38:03,940] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 245 +14: [2023-05-25 13:38:03,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_04_optim_states.pt. +14: [2023-05-25 13:38:03,943] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 112 + 1: [2023-05-25 13:38:03,947] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,947] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,949] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,949] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_03-model_states.pt. + 0: [2023-05-25 13:38:03,955] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt. + 0: [2023-05-25 13:38:03,955] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,955] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt. +14: [2023-05-25 13:38:03,956] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 112 + 7: [2023-05-25 13:38:03,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt. +29: [2023-05-25 13:38:03,958] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 233 +29: [2023-05-25 13:38:03,961] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 237 +26: [2023-05-25 13:38:03,961] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 214 +26: [2023-05-25 13:38:03,962] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 210 +25: [2023-05-25 13:38:03,962] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 202 + 3: [2023-05-25 13:38:03,962] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_02_optim_states.pt... + 2: [2023-05-25 13:38:03,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_02_optim_states.pt... +25: [2023-05-25 13:38:03,965] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 206 + 3: [2023-05-25 13:38:03,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt. +18: [2023-05-25 13:38:03,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_08_optim_states.pt. +18: [2023-05-25 13:38:03,971] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 148 + 1: [2023-05-25 13:38:03,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_02_optim_states.pt... + 1: [2023-05-25 13:38:03,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_02_optim_states.pt... + 2: [2023-05-25 13:38:03,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_01-model_states.pt. +27: [2023-05-25 13:38:03,978] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 223 +27: [2023-05-25 13:38:03,981] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 219 +25: [2023-05-25 13:38:03,981] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 201 +30: [2023-05-25 13:38:03,982] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 242 +25: [2023-05-25 13:38:03,982] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 205 +18: [2023-05-25 13:38:03,983] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 148 +24: [2023-05-25 13:38:03,985] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 198 +12: [2023-05-25 13:38:03,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_04_optim_states.pt. +30: [2023-05-25 13:38:03,986] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 246 +12: [2023-05-25 13:38:03,986] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 96 + 6: [2023-05-25 13:38:03,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_02_optim_states.pt... + 6: [2023-05-25 13:38:03,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_02_optim_states.pt... +31: [2023-05-25 13:38:03,989] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 249 +31: [2023-05-25 13:38:03,989] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 253 + 3: [2023-05-25 13:38:03,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_02_optim_states.pt... + 3: [2023-05-25 13:38:03,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_02_optim_states.pt... +24: [2023-05-25 13:38:03,990] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 194 +14: [2023-05-25 13:38:03,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_04_optim_states.pt. +14: [2023-05-25 13:38:03,992] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 116 + 3: [2023-05-25 13:38:03,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_03_optim_states.pt... + 4: [2023-05-25 13:38:03,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_03_optim_states.pt... + 3: [2023-05-25 13:38:03,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_03_optim_states.pt... + 4: [2023-05-25 13:38:03,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_03_optim_states.pt... + 4: [2023-05-25 13:38:03,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt. +27: [2023-05-25 13:38:03,997] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 222 + 6: [2023-05-25 13:38:03,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_01_optim_states.pt... + 6: [2023-05-25 13:38:03,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_01_optim_states.pt... +12: [2023-05-25 13:38:03,999] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 96 +22: [2023-05-25 13:38:04,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_08_optim_states.pt. +22: [2023-05-25 13:38:04,001] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 176 +27: [2023-05-25 13:38:04,004] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 218 +27: [2023-05-25 13:38:04,005] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 217 +14: [2023-05-25 13:38:04,005] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 116 + 5: [2023-05-25 13:38:04,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_01_optim_states.pt... + 5: [2023-05-25 13:38:04,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_01_optim_states.pt... +26: [2023-05-25 13:38:04,008] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 213 +27: [2023-05-25 13:38:04,008] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 221 +26: [2023-05-25 13:38:04,008] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 209 + 3: [2023-05-25 13:38:04,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_01_optim_states.pt... + 3: [2023-05-25 13:38:04,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_01_optim_states.pt... +22: [2023-05-25 13:38:04,013] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 176 + 8: [2023-05-25 13:38:04,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_04_optim_states.pt. + 8: [2023-05-25 13:38:04,016] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 68 +16: [2023-05-25 13:38:04,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_08_optim_states.pt. +16: [2023-05-25 13:38:04,025] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 128 +28: [2023-05-25 13:38:04,026] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 230 +28: [2023-05-25 13:38:04,027] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 226 + 7: [2023-05-25 13:38:04,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt. + 7: [2023-05-25 13:38:04,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt. + 8: [2023-05-25 13:38:04,030] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 68 + 7: [2023-05-25 13:38:04,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_01_optim_states.pt... + 7: [2023-05-25 13:38:04,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_01_optim_states.pt... + 8: [2023-05-25 13:38:04,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_04_optim_states.pt. + 8: [2023-05-25 13:38:04,039] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 64 +16: [2023-05-25 13:38:04,042] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 128 + 0: [2023-05-25 13:38:04,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_02_optim_states.pt... + 0: [2023-05-25 13:38:04,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_02_optim_states.pt... + 8: [2023-05-25 13:38:04,051] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 64 + 4: [2023-05-25 13:38:04,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_02_optim_states.pt... + 4: [2023-05-25 13:38:04,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_02_optim_states.pt... +11: [2023-05-25 13:38:04,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_04_optim_states.pt. + 7: [2023-05-25 13:38:04,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_02_optim_states.pt... +11: [2023-05-25 13:38:04,052] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 92 + 7: [2023-05-25 13:38:04,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_02_optim_states.pt... +31: [2023-05-25 13:38:04,057] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 250 +31: [2023-05-25 13:38:04,061] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 254 +11: [2023-05-25 13:38:04,065] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 92 +16: [2023-05-25 13:38:04,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_08_optim_states.pt. +16: [2023-05-25 13:38:04,073] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 132 +20: [2023-05-25 13:38:04,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_08_optim_states.pt. +20: [2023-05-25 13:38:04,079] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 160 +16: [2023-05-25 13:38:04,085] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 132 +20: [2023-05-25 13:38:04,094] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 160 +10: [2023-05-25 13:38:04,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_04_optim_states.pt. +10: [2023-05-25 13:38:04,094] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 84 +22: [2023-05-25 13:38:04,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_08_optim_states.pt. +22: [2023-05-25 13:38:04,099] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 180 +13: [2023-05-25 13:38:04,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_04_optim_states.pt. +13: [2023-05-25 13:38:04,102] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 108 +17: [2023-05-25 13:38:04,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_08_optim_states.pt. +17: [2023-05-25 13:38:04,106] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 136 +10: [2023-05-25 13:38:04,106] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 84 + 2: [2023-05-25 13:38:04,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_01_optim_states.pt... + 2: [2023-05-25 13:38:04,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_01_optim_states.pt... +22: [2023-05-25 13:38:04,112] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 180 +13: [2023-05-25 13:38:04,118] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 108 +17: [2023-05-25 13:38:04,118] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 136 + 1: [2023-05-25 13:38:04,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_01_optim_states.pt... + 1: [2023-05-25 13:38:04,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_01_optim_states.pt... + 5: [2023-05-25 13:38:04,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt. + 5: [2023-05-25 13:38:04,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/layer_13-model_02-model_states.pt. +15: [2023-05-25 13:38:04,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_04_optim_states.pt. +15: [2023-05-25 13:38:04,143] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 120 +15: [2023-05-25 13:38:04,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_04_optim_states.pt. +15: [2023-05-25 13:38:04,144] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 124 +23: [2023-05-25 13:38:04,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_08_optim_states.pt. +23: [2023-05-25 13:38:04,147] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 188 +13: [2023-05-25 13:38:04,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_04_optim_states.pt. +13: [2023-05-25 13:38:04,153] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 104 +15: [2023-05-25 13:38:04,157] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 124 +15: [2023-05-25 13:38:04,159] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 120 +23: [2023-05-25 13:38:04,161] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 188 +10: [2023-05-25 13:38:04,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_04_optim_states.pt. +10: [2023-05-25 13:38:04,165] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 80 +13: [2023-05-25 13:38:04,165] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 104 + 5: [2023-05-25 13:38:04,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_02_optim_states.pt... + 5: [2023-05-25 13:38:04,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_02_optim_states.pt... +10: [2023-05-25 13:38:04,176] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 80 +29: [2023-05-25 13:38:04,176] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 234 +29: [2023-05-25 13:38:04,177] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 238 +17: [2023-05-25 13:38:04,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_08_optim_states.pt. +17: [2023-05-25 13:38:04,214] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 140 +12: [2023-05-25 13:38:04,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_04_optim_states.pt. +12: [2023-05-25 13:38:04,224] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 100 +11: [2023-05-25 13:38:04,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_04_optim_states.pt. +11: [2023-05-25 13:38:04,227] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 88 +17: [2023-05-25 13:38:04,227] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 140 +11: [2023-05-25 13:38:04,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_05_optim_states.pt. +11: [2023-05-25 13:38:04,229] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 93 +23: [2023-05-25 13:38:04,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_08_optim_states.pt. +23: [2023-05-25 13:38:04,235] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 184 +23: [2023-05-25 13:38:04,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_10_optim_states.pt. +23: [2023-05-25 13:38:04,236] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 190 +15: [2023-05-25 13:38:04,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_05_optim_states.pt. +15: [2023-05-25 13:38:04,236] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 121 +12: [2023-05-25 13:38:04,237] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 100 +15: [2023-05-25 13:38:04,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_06_optim_states.pt. +15: [2023-05-25 13:38:04,240] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 126 +11: [2023-05-25 13:38:04,246] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 88 +11: [2023-05-25 13:38:04,246] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 93 +23: [2023-05-25 13:38:04,248] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 184 +23: [2023-05-25 13:38:04,250] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 190 +15: [2023-05-25 13:38:04,251] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 121 +16: [2023-05-25 13:38:04,254] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_10_optim_states.pt. +16: [2023-05-25 13:38:04,254] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 134 +15: [2023-05-25 13:38:04,255] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 126 +16: [2023-05-25 13:38:04,267] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 134 +10: [2023-05-25 13:38:04,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_06_optim_states.pt. +10: [2023-05-25 13:38:04,268] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 82 +21: [2023-05-25 13:38:04,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_10_optim_states.pt. +21: [2023-05-25 13:38:04,274] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 174 +10: [2023-05-25 13:38:04,279] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 82 +22: [2023-05-25 13:38:04,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_09_optim_states.pt. +22: [2023-05-25 13:38:04,282] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 177 +21: [2023-05-25 13:38:04,293] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 174 +22: [2023-05-25 13:38:04,294] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 177 +12: [2023-05-25 13:38:04,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_07_optim_states.pt. +12: [2023-05-25 13:38:04,297] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 103 +11: [2023-05-25 13:38:04,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_05_optim_states.pt. +11: [2023-05-25 13:38:04,306] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 89 +12: [2023-05-25 13:38:04,311] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 103 +16: [2023-05-25 13:38:04,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_11_optim_states.pt. +16: [2023-05-25 13:38:04,311] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 131 +13: [2023-05-25 13:38:04,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_06_optim_states.pt. +13: [2023-05-25 13:38:04,318] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 106 +11: [2023-05-25 13:38:04,320] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 89 +16: [2023-05-25 13:38:04,326] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 131 +21: [2023-05-25 13:38:04,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_10_optim_states.pt. +21: [2023-05-25 13:38:04,327] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 170 +20: [2023-05-25 13:38:04,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_11_optim_states.pt. +20: [2023-05-25 13:38:04,330] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 163 +13: [2023-05-25 13:38:04,334] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 106 +12: [2023-05-25 13:38:04,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_06_optim_states.pt. +12: [2023-05-25 13:38:04,336] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 98 +19: [2023-05-25 13:38:04,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_11_optim_states.pt. +19: [2023-05-25 13:38:04,339] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 159 +21: [2023-05-25 13:38:04,339] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 170 +21: [2023-05-25 13:38:04,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_08_optim_states.pt. +21: [2023-05-25 13:38:04,340] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 168 +20: [2023-05-25 13:38:04,342] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 163 +14: [2023-05-25 13:38:04,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_05_optim_states.pt. +14: [2023-05-25 13:38:04,343] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 117 +15: [2023-05-25 13:38:04,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_05_optim_states.pt. +15: [2023-05-25 13:38:04,345] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 125 +12: [2023-05-25 13:38:04,350] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 98 +19: [2023-05-25 13:38:04,351] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 159 +19: [2023-05-25 13:38:04,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_10_optim_states.pt. +19: [2023-05-25 13:38:04,353] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 154 +21: [2023-05-25 13:38:04,353] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 168 +14: [2023-05-25 13:38:04,356] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 117 +15: [2023-05-25 13:38:04,360] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 125 +14: [2023-05-25 13:38:04,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_06_optim_states.pt. +14: [2023-05-25 13:38:04,364] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 118 +19: [2023-05-25 13:38:04,365] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 154 +17: [2023-05-25 13:38:04,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_10_optim_states.pt. +17: [2023-05-25 13:38:04,368] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 138 +13: [2023-05-25 13:38:04,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_05_optim_states.pt. +13: [2023-05-25 13:38:04,372] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 105 +17: [2023-05-25 13:38:04,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_11_optim_states.pt. +17: [2023-05-25 13:38:04,373] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 139 +10: [2023-05-25 13:38:04,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_05_optim_states.pt. +10: [2023-05-25 13:38:04,376] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 85 +17: [2023-05-25 13:38:04,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_10_optim_states.pt. +17: [2023-05-25 13:38:04,377] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 142 +14: [2023-05-25 13:38:04,380] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 118 +10: [2023-05-25 13:38:04,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_05_optim_states.pt. +10: [2023-05-25 13:38:04,380] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 81 +17: [2023-05-25 13:38:04,382] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 138 +17: [2023-05-25 13:38:04,386] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 139 +13: [2023-05-25 13:38:04,388] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 105 +10: [2023-05-25 13:38:04,389] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 85 + 9: [2023-05-25 13:38:04,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_05_optim_states.pt. + 9: [2023-05-25 13:38:04,389] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 77 +23: [2023-05-25 13:38:04,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_09_optim_states.pt. +23: [2023-05-25 13:38:04,390] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 185 +17: [2023-05-25 13:38:04,392] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 142 +14: [2023-05-25 13:38:04,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_05_optim_states.pt. +14: [2023-05-25 13:38:04,394] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 113 +10: [2023-05-25 13:38:04,395] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 81 + 6: [2023-05-25 13:38:04,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. + 6: [2023-05-25 13:38:04,396] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 52 +21: [2023-05-25 13:38:04,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_09_optim_states.pt. +21: [2023-05-25 13:38:04,396] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 169 + 9: [2023-05-25 13:38:04,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_07_optim_states.pt. + 9: [2023-05-25 13:38:04,399] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 75 +23: [2023-05-25 13:38:04,403] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 185 + 9: [2023-05-25 13:38:04,404] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 77 + 8: [2023-05-25 13:38:04,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_05_optim_states.pt. + 8: [2023-05-25 13:38:04,404] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 69 +14: [2023-05-25 13:38:04,406] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 113 +21: [2023-05-25 13:38:04,410] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 169 +19: [2023-05-25 13:38:04,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_11_optim_states.pt. +19: [2023-05-25 13:38:04,412] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 155 + 6: [2023-05-25 13:38:04,412] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 52 + 9: [2023-05-25 13:38:04,414] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 75 + 8: [2023-05-25 13:38:04,417] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 69 +19: [2023-05-25 13:38:04,424] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 155 +20: [2023-05-25 13:38:04,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_10_optim_states.pt. +20: [2023-05-25 13:38:04,430] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 162 +20: [2023-05-25 13:38:04,442] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 162 +21: [2023-05-25 13:38:04,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_08_optim_states.pt. +21: [2023-05-25 13:38:04,449] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 172 +10: [2023-05-25 13:38:04,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_07_optim_states.pt. +10: [2023-05-25 13:38:04,450] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 87 +12: [2023-05-25 13:38:04,453] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_05_optim_states.pt. +12: [2023-05-25 13:38:04,453] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 101 +10: [2023-05-25 13:38:04,463] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 87 +21: [2023-05-25 13:38:04,464] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 172 +11: [2023-05-25 13:38:04,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_06_optim_states.pt. +11: [2023-05-25 13:38:04,464] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 90 +12: [2023-05-25 13:38:04,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_07_optim_states.pt. +12: [2023-05-25 13:38:04,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_06_optim_states.pt. +12: [2023-05-25 13:38:04,468] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 99 +12: [2023-05-25 13:38:04,468] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 102 +12: [2023-05-25 13:38:04,469] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 101 +18: [2023-05-25 13:38:04,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_10_optim_states.pt. +18: [2023-05-25 13:38:04,471] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 146 +19: [2023-05-25 13:38:04,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_10_optim_states.pt. +19: [2023-05-25 13:38:04,471] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 158 + 9: [2023-05-25 13:38:04,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_04_optim_states.pt. + 9: [2023-05-25 13:38:04,473] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 76 + 9: [2023-05-25 13:38:04,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_07_optim_states.pt. + 9: [2023-05-25 13:38:04,474] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 79 +11: [2023-05-25 13:38:04,478] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 90 +12: [2023-05-25 13:38:04,483] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 99 +12: [2023-05-25 13:38:04,483] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 102 +18: [2023-05-25 13:38:04,483] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 146 + 8: [2023-05-25 13:38:04,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_07_optim_states.pt. + 8: [2023-05-25 13:38:04,482] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 71 + 8: [2023-05-25 13:38:04,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_06_optim_states.pt. + 8: [2023-05-25 13:38:04,482] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 70 +19: [2023-05-25 13:38:04,486] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 158 + 9: [2023-05-25 13:38:04,488] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 76 +20: [2023-05-25 13:38:04,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_11_optim_states.pt. +20: [2023-05-25 13:38:04,489] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 167 + 9: [2023-05-25 13:38:04,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_06_optim_states.pt. + 9: [2023-05-25 13:38:04,489] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 74 + 9: [2023-05-25 13:38:04,491] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 79 + 7: [2023-05-25 13:38:04,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. + 7: [2023-05-25 13:38:04,492] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 56 + 8: [2023-05-25 13:38:04,495] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 71 + 8: [2023-05-25 13:38:04,496] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 70 +18: [2023-05-25 13:38:04,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_09_optim_states.pt. +18: [2023-05-25 13:38:04,499] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 145 + 8: [2023-05-25 13:38:04,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_05_optim_states.pt. + 8: [2023-05-25 13:38:04,502] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 65 + 9: [2023-05-25 13:38:04,502] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 74 +20: [2023-05-25 13:38:04,502] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 167 +23: [2023-05-25 13:38:04,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_09_optim_states.pt. +23: [2023-05-25 13:38:04,505] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 189 + 7: [2023-05-25 13:38:04,506] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 56 +20: [2023-05-25 13:38:04,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_10_optim_states.pt. +20: [2023-05-25 13:38:04,506] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 166 +11: [2023-05-25 13:38:04,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_07_optim_states.pt. +11: [2023-05-25 13:38:04,509] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 95 +18: [2023-05-25 13:38:04,511] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 145 + 8: [2023-05-25 13:38:04,516] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 65 +15: [2023-05-25 13:38:04,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_06_optim_states.pt. +15: [2023-05-25 13:38:04,518] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 122 +20: [2023-05-25 13:38:04,519] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 166 +23: [2023-05-25 13:38:04,520] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 189 +11: [2023-05-25 13:38:04,522] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 95 +17: [2023-05-25 13:38:04,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_11_optim_states.pt. +17: [2023-05-25 13:38:04,525] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 143 +20: [2023-05-25 13:38:04,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_09_optim_states.pt. +20: [2023-05-25 13:38:04,529] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 161 +21: [2023-05-25 13:38:04,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_11_optim_states.pt. +21: [2023-05-25 13:38:04,530] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 171 +15: [2023-05-25 13:38:04,533] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 122 +18: [2023-05-25 13:38:04,538] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_09_optim_states.pt. +18: [2023-05-25 13:38:04,539] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 149 +17: [2023-05-25 13:38:04,539] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 143 +20: [2023-05-25 13:38:04,542] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 161 +18: [2023-05-25 13:38:04,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_10_optim_states.pt. +18: [2023-05-25 13:38:04,543] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 150 +21: [2023-05-25 13:38:04,544] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 171 +16: [2023-05-25 13:38:04,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_10_optim_states.pt. +16: [2023-05-25 13:38:04,545] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 130 +11: [2023-05-25 13:38:04,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_06_optim_states.pt. +11: [2023-05-25 13:38:04,549] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 94 +18: [2023-05-25 13:38:04,552] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 149 + 9: [2023-05-25 13:38:04,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_04_optim_states.pt. + 9: [2023-05-25 13:38:04,552] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 72 +14: [2023-05-25 13:38:04,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_06_optim_states.pt. +14: [2023-05-25 13:38:04,554] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 114 +18: [2023-05-25 13:38:04,558] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 150 +22: [2023-05-25 13:38:04,559] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_10_optim_states.pt. +22: [2023-05-25 13:38:04,560] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 182 +16: [2023-05-25 13:38:04,560] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 130 +11: [2023-05-25 13:38:04,563] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 94 +14: [2023-05-25 13:38:04,567] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 114 + 9: [2023-05-25 13:38:04,567] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 72 +16: [2023-05-25 13:38:04,567] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_09_optim_states.pt. +16: [2023-05-25 13:38:04,567] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 133 +19: [2023-05-25 13:38:04,569] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_09_optim_states.pt. +19: [2023-05-25 13:38:04,569] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 157 +14: [2023-05-25 13:38:04,569] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_07_optim_states.pt. +14: [2023-05-25 13:38:04,569] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 119 +12: [2023-05-25 13:38:04,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_05_optim_states.pt. +12: [2023-05-25 13:38:04,570] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 97 +22: [2023-05-25 13:38:04,572] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 182 +16: [2023-05-25 13:38:04,579] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 133 +10: [2023-05-25 13:38:04,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_06_optim_states.pt. +10: [2023-05-25 13:38:04,583] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 86 +19: [2023-05-25 13:38:04,583] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 157 +12: [2023-05-25 13:38:04,584] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 97 +14: [2023-05-25 13:38:04,585] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 119 +23: [2023-05-25 13:38:04,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_11_optim_states.pt. +23: [2023-05-25 13:38:04,586] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 187 +20: [2023-05-25 13:38:04,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_09_optim_states.pt. +20: [2023-05-25 13:38:04,590] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 165 +18: [2023-05-25 13:38:04,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_11_optim_states.pt. +18: [2023-05-25 13:38:04,597] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 151 +10: [2023-05-25 13:38:04,598] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 86 +23: [2023-05-25 13:38:04,602] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 187 +20: [2023-05-25 13:38:04,604] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 165 +18: [2023-05-25 13:38:04,611] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 151 +19: [2023-05-25 13:38:04,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_09_optim_states.pt. +19: [2023-05-25 13:38:04,613] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 153 +22: [2023-05-25 13:38:04,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_09_optim_states.pt. +22: [2023-05-25 13:38:04,619] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 181 +22: [2023-05-25 13:38:04,620] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_11_optim_states.pt. +22: [2023-05-25 13:38:04,620] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 183 +15: [2023-05-25 13:38:04,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_07_optim_states.pt. +15: [2023-05-25 13:38:04,627] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 127 +19: [2023-05-25 13:38:04,628] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 153 +22: [2023-05-25 13:38:04,633] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 181 +16: [2023-05-25 13:38:04,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_11_optim_states.pt. +16: [2023-05-25 13:38:04,635] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 135 +22: [2023-05-25 13:38:04,636] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 183 +11: [2023-05-25 13:38:04,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_07_optim_states.pt. +13: [2023-05-25 13:38:04,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_07_optim_states.pt. +11: [2023-05-25 13:38:04,636] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 91 +13: [2023-05-25 13:38:04,636] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 111 + 8: [2023-05-25 13:38:04,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_07_optim_states.pt. + 8: [2023-05-25 13:38:04,638] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 67 +10: [2023-05-25 13:38:04,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_07_optim_states.pt. +10: [2023-05-25 13:38:04,640] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 83 +15: [2023-05-25 13:38:04,640] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 127 + 9: [2023-05-25 13:38:04,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_05_optim_states.pt. + 9: [2023-05-25 13:38:04,640] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 73 +17: [2023-05-25 13:38:04,645] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_09_optim_states.pt. +17: [2023-05-25 13:38:04,645] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 141 +13: [2023-05-25 13:38:04,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_05_optim_states.pt. +13: [2023-05-25 13:38:04,646] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 109 +11: [2023-05-25 13:38:04,650] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 91 + 0: [2023-05-25 13:38:04,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. + 0: [2023-05-25 13:38:04,650] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 0 +16: [2023-05-25 13:38:04,650] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 135 +13: [2023-05-25 13:38:04,652] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 111 + 8: [2023-05-25 13:38:04,653] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 67 + 9: [2023-05-25 13:38:04,654] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 73 +10: [2023-05-25 13:38:04,654] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 83 +17: [2023-05-25 13:38:04,657] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 141 +13: [2023-05-25 13:38:04,660] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 109 + 0: [2023-05-25 13:38:04,664] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 0 +21: [2023-05-25 13:38:04,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_09_optim_states.pt. +21: [2023-05-25 13:38:04,667] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 173 + 8: [2023-05-25 13:38:04,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_06_optim_states.pt. + 8: [2023-05-25 13:38:04,675] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 66 + 0: could not find arguments in the checkpoint ... + 0: checkpoint version 3.0 +21: [2023-05-25 13:38:04,683] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 173 +15: [2023-05-25 13:38:04,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_07_optim_states.pt. +15: [2023-05-25 13:38:04,685] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 123 +14: [2023-05-25 13:38:04,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_07_optim_states.pt. +14: [2023-05-25 13:38:04,687] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 115 + 8: [2023-05-25 13:38:04,688] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 66 +14: [2023-05-25 13:38:04,700] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 115 +15: [2023-05-25 13:38:04,701] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 123 + 0: [2023-05-25 13:38:04,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_03_optim_states.pt. + 0: [2023-05-25 13:38:04,707] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 3 + 9: [2023-05-25 13:38:04,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_06_optim_states.pt. + 9: [2023-05-25 13:38:04,709] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 78 + 0: [2023-05-25 13:38:04,722] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 3 +16: [2023-05-25 13:38:04,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_09_optim_states.pt. +16: [2023-05-25 13:38:04,722] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 129 + 9: [2023-05-25 13:38:04,726] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 78 +23: [2023-05-25 13:38:04,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_10_optim_states.pt. +23: [2023-05-25 13:38:04,727] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 186 +18: [2023-05-25 13:38:04,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_11_optim_states.pt. +18: [2023-05-25 13:38:04,727] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 147 +16: [2023-05-25 13:38:04,738] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 129 +18: [2023-05-25 13:38:04,740] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 147 +23: [2023-05-25 13:38:04,743] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 186 + 4: [2023-05-25 13:38:04,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. + 4: [2023-05-25 13:38:04,750] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 32 +22: [2023-05-25 13:38:04,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_11_optim_states.pt. +22: [2023-05-25 13:38:04,754] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 179 + 5: [2023-05-25 13:38:04,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. + 5: [2023-05-25 13:38:04,758] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 40 +22: [2023-05-25 13:38:04,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_10_optim_states.pt. +22: [2023-05-25 13:38:04,758] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 178 + 4: [2023-05-25 13:38:04,764] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 32 +13: [2023-05-25 13:38:04,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_07_optim_states.pt. +13: [2023-05-25 13:38:04,766] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 107 +22: [2023-05-25 13:38:04,770] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 179 +22: [2023-05-25 13:38:04,771] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 178 +23: [2023-05-25 13:38:04,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_11_optim_states.pt. +23: [2023-05-25 13:38:04,772] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 191 + 5: [2023-05-25 13:38:04,773] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 40 + 0: [2023-05-25 13:38:04,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. + 0: [2023-05-25 13:38:04,777] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 4 + 5: [2023-05-25 13:38:04,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_03_optim_states.pt. + 5: [2023-05-25 13:38:04,779] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 43 +13: [2023-05-25 13:38:04,780] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 107 +23: [2023-05-25 13:38:04,783] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 191 + 0: [2023-05-25 13:38:04,790] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 4 + 5: [2023-05-25 13:38:04,796] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 43 +13: [2023-05-25 13:38:04,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_06_optim_states.pt. +13: [2023-05-25 13:38:04,852] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 110 + 3: [2023-05-25 13:38:04,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. + 3: [2023-05-25 13:38:04,861] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 24 +13: [2023-05-25 13:38:04,867] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 110 +17: [2023-05-25 13:38:04,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_09_optim_states.pt. +17: [2023-05-25 13:38:04,872] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 137 + 3: [2023-05-25 13:38:04,878] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 24 +17: [2023-05-25 13:38:04,884] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 137 + 3: [2023-05-25 13:38:04,891] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_03_optim_states.pt. + 3: [2023-05-25 13:38:04,891] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 27 + 1: [2023-05-25 13:38:04,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. + 1: [2023-05-25 13:38:04,893] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 12 + 3: [2023-05-25 13:38:04,907] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 27 + 1: [2023-05-25 13:38:04,908] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 12 + 5: [2023-05-25 13:38:04,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_01_optim_states.pt. + 5: [2023-05-25 13:38:04,918] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 41 + 0: [2023-05-25 13:38:04,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_03_optim_states.pt. + 0: [2023-05-25 13:38:04,924] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 7 + 6: [2023-05-25 13:38:04,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_01_optim_states.pt. + 6: [2023-05-25 13:38:04,924] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 53 + 5: [2023-05-25 13:38:04,936] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 41 + 7: [2023-05-25 13:38:04,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_01_optim_states.pt. + 7: [2023-05-25 13:38:04,938] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 57 + 6: [2023-05-25 13:38:04,939] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 53 + 2: [2023-05-25 13:38:04,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. + 2: [2023-05-25 13:38:04,939] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 16 + 7: [2023-05-25 13:38:04,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_02_optim_states.pt. + 5: [2023-05-25 13:38:04,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. + 7: [2023-05-25 13:38:04,939] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 62 + 5: [2023-05-25 13:38:04,939] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 44 + 0: [2023-05-25 13:38:04,939] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 7 + 2: [2023-05-25 13:38:04,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_02_optim_states.pt. + 2: [2023-05-25 13:38:04,942] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 18 + 4: [2023-05-25 13:38:04,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_02_optim_states.pt. + 4: [2023-05-25 13:38:04,950] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 38 + 4: [2023-05-25 13:38:04,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_03_optim_states.pt. + 4: [2023-05-25 13:38:04,950] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 39 + 4: [2023-05-25 13:38:04,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. + 4: [2023-05-25 13:38:04,953] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 36 + 7: [2023-05-25 13:38:04,954] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 57 + 5: [2023-05-25 13:38:04,954] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 44 + 2: [2023-05-25 13:38:04,954] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 16 + 7: [2023-05-25 13:38:04,954] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 62 +21: [2023-05-25 13:38:04,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_11_optim_states.pt. +21: [2023-05-25 13:38:04,957] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 175 + 2: [2023-05-25 13:38:04,957] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 18 + 4: [2023-05-25 13:38:04,967] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 39 + 4: [2023-05-25 13:38:04,967] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 38 +21: [2023-05-25 13:38:04,970] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 175 + 4: [2023-05-25 13:38:04,970] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 36 + 6: [2023-05-25 13:38:04,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_01_optim_states.pt. + 6: [2023-05-25 13:38:04,971] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 49 + 6: [2023-05-25 13:38:04,989] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 49 + 6: [2023-05-25 13:38:04,991] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. + 6: [2023-05-25 13:38:04,991] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 48 + 3: [2023-05-25 13:38:04,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. + 3: [2023-05-25 13:38:04,999] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 28 + 0: [2023-05-25 13:38:05,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_01_optim_states.pt. + 0: [2023-05-25 13:38:05,006] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 5 + 6: [2023-05-25 13:38:05,006] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 48 + 3: [2023-05-25 13:38:05,014] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 28 + 0: [2023-05-25 13:38:05,021] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 5 + 0: [2023-05-25 13:38:05,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_01_optim_states.pt. + 0: [2023-05-25 13:38:05,026] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 1 + 1: [2023-05-25 13:38:05,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. + 1: [2023-05-25 13:38:05,029] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 8 + 0: [2023-05-25 13:38:05,042] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 1 + 1: [2023-05-25 13:38:05,043] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 8 + 6: [2023-05-25 13:38:05,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_03_optim_states.pt. + 6: [2023-05-25 13:38:05,044] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 51 + 6: [2023-05-25 13:38:05,059] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 51 + 5: [2023-05-25 13:38:05,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_03_optim_states.pt. + 5: [2023-05-25 13:38:05,064] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 47 + 7: [2023-05-25 13:38:05,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_01_optim_states.pt. + 7: [2023-05-25 13:38:05,065] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 61 + 4: [2023-05-25 13:38:05,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_9_mp_rank_01_optim_states.pt. + 4: [2023-05-25 13:38:05,072] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 37 + 7: [2023-05-25 13:38:05,081] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 61 + 5: [2023-05-25 13:38:05,081] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 47 + 3: [2023-05-25 13:38:05,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_01_optim_states.pt. + 3: [2023-05-25 13:38:05,086] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 29 + 4: [2023-05-25 13:38:05,087] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 37 + 4: [2023-05-25 13:38:05,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_03_optim_states.pt. + 4: [2023-05-25 13:38:05,096] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 35 + 3: [2023-05-25 13:38:05,101] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 29 + 4: [2023-05-25 13:38:05,110] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 35 + 2: [2023-05-25 13:38:05,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. + 2: [2023-05-25 13:38:05,111] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 20 + 6: [2023-05-25 13:38:05,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_02_optim_states.pt. + 6: [2023-05-25 13:38:05,113] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 54 + 1: [2023-05-25 13:38:05,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_02_optim_states.pt. + 1: [2023-05-25 13:38:05,115] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 10 + 0: [2023-05-25 13:38:05,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_1_mp_rank_02_optim_states.pt. + 0: [2023-05-25 13:38:05,123] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 6 + 6: [2023-05-25 13:38:05,129] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 54 + 2: [2023-05-25 13:38:05,130] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 20 + 1: [2023-05-25 13:38:05,130] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 10 + 2: [2023-05-25 13:38:05,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_02_optim_states.pt. + 2: [2023-05-25 13:38:05,132] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 22 + 4: [2023-05-25 13:38:05,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_01_optim_states.pt. + 4: [2023-05-25 13:38:05,134] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 33 + 0: [2023-05-25 13:38:05,139] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 6 + 2: [2023-05-25 13:38:05,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_03_optim_states.pt. + 2: [2023-05-25 13:38:05,144] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 19 + 2: [2023-05-25 13:38:05,146] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 22 + 4: [2023-05-25 13:38:05,149] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 33 + 5: [2023-05-25 13:38:05,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_10_mp_rank_02_optim_states.pt. + 5: [2023-05-25 13:38:05,155] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 42 + 1: [2023-05-25 13:38:05,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_01_optim_states.pt. + 1: [2023-05-25 13:38:05,156] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 13 + 2: [2023-05-25 13:38:05,160] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 19 + 1: [2023-05-25 13:38:05,170] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 13 + 5: [2023-05-25 13:38:05,172] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 42 + 5: [2023-05-25 13:38:05,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_01_optim_states.pt. + 5: [2023-05-25 13:38:05,172] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 45 + 1: [2023-05-25 13:38:05,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_03_optim_states.pt. + 1: [2023-05-25 13:38:05,175] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 11 + 1: [2023-05-25 13:38:05,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_02_optim_states.pt. + 1: [2023-05-25 13:38:05,179] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 14 + 7: [2023-05-25 13:38:05,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. + 7: [2023-05-25 13:38:05,183] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 60 + 6: [2023-05-25 13:38:05,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_12_mp_rank_02_optim_states.pt. + 6: [2023-05-25 13:38:05,186] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 50 + 6: [2023-05-25 13:38:05,187] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_13_mp_rank_03_optim_states.pt. + 6: [2023-05-25 13:38:05,187] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 55 + 5: [2023-05-25 13:38:05,188] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 45 + 1: [2023-05-25 13:38:05,189] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 11 + 2: [2023-05-25 13:38:05,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_03_optim_states.pt. + 2: [2023-05-25 13:38:05,190] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 23 + 1: [2023-05-25 13:38:05,194] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 14 + 2: [2023-05-25 13:38:05,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_4_mp_rank_01_optim_states.pt. + 2: [2023-05-25 13:38:05,195] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 17 + 7: [2023-05-25 13:38:05,198] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 60 + 6: [2023-05-25 13:38:05,202] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 50 + 6: [2023-05-25 13:38:05,205] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 55 + 2: [2023-05-25 13:38:05,208] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 23 + 2: [2023-05-25 13:38:05,211] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 17 + 5: [2023-05-25 13:38:05,219] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_11_mp_rank_02_optim_states.pt. + 5: [2023-05-25 13:38:05,219] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 46 + 5: [2023-05-25 13:38:05,235] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 46 + 3: [2023-05-25 13:38:05,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_02_optim_states.pt. + 3: [2023-05-25 13:38:05,242] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 30 + 4: [2023-05-25 13:38:05,254] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_8_mp_rank_02_optim_states.pt. + 4: [2023-05-25 13:38:05,254] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 34 + 3: [2023-05-25 13:38:05,256] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 30 + 4: [2023-05-25 13:38:05,271] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 34 + 0: [2023-05-25 13:38:05,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_0_mp_rank_02_optim_states.pt. + 0: [2023-05-25 13:38:05,291] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 2 + 3: [2023-05-25 13:38:05,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_02_optim_states.pt. + 3: [2023-05-25 13:38:05,298] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 26 + 1: [2023-05-25 13:38:05,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_3_mp_rank_03_optim_states.pt. + 1: [2023-05-25 13:38:05,298] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 15 + 7: [2023-05-25 13:38:05,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_15_mp_rank_03_optim_states.pt. + 7: [2023-05-25 13:38:05,300] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 63 + 0: [2023-05-25 13:38:05,309] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 2 + 3: [2023-05-25 13:38:05,313] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 26 + 1: [2023-05-25 13:38:05,315] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 15 + 3: [2023-05-25 13:38:05,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_6_mp_rank_01_optim_states.pt. + 3: [2023-05-25 13:38:05,316] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 25 + 7: [2023-05-25 13:38:05,316] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 63 + 3: [2023-05-25 13:38:05,331] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 25 + 3: [2023-05-25 13:38:05,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_7_mp_rank_03_optim_states.pt. + 3: [2023-05-25 13:38:05,338] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 31 + 7: [2023-05-25 13:38:05,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_02_optim_states.pt. + 7: [2023-05-25 13:38:05,350] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 58 + 3: [2023-05-25 13:38:05,351] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 31 + 7: [2023-05-25 13:38:05,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_14_mp_rank_03_optim_states.pt. + 7: [2023-05-25 13:38:05,352] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 59 + 2: [2023-05-25 13:38:05,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_5_mp_rank_01_optim_states.pt. + 2: [2023-05-25 13:38:05,364] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 21 + 7: [2023-05-25 13:38:05,365] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 58 + 7: [2023-05-25 13:38:05,368] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 59 + 2: [2023-05-25 13:38:05,381] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 21 + 1: [2023-05-25 13:38:05,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b/global_step84877/bf16_zero_pp_rank_2_mp_rank_01_optim_states.pt. + 1: [2023-05-25 13:38:05,386] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 9 + 1: [2023-05-25 13:38:05,402] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 9 + 0: successfully loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b4b at iteration 0 +31: time (ms) | load-checkpoint: 6084.62 + 0: estimated model parameters: 9.828646912 + 0: estimated model parameters without embeddings: 8.863956992 + 0: [after model, optimizer, and learning rate scheduler are built] datetime: 2023-05-25 13:38:06 + 0: > building train, validation, and test datasets ... + 0: > datasets target sizes (minimum size): + 0: train: 1 + 0: validation: 51200 + 0: test: 51200 + 0: > building train, validation, and test datasets for GPT ... + 0: > building dataset index ... + 0: reading sizes... + 0: reading pointers... + 0: reading document index... + 0: creating numpy buffer of mmap... + 0: creating memory view of numpy buffer... + 0: > finished creating indexed dataset in 0.066274 seconds + 0: number of documents: 835726 + 0: > dataset split: + 0: train: + 0: document indices in [0, 835726) total of 835726 documents + 0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_400M_text_document_train_indexmap_1ns_2048sl_1234s_doc_idx.npy + 0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_400M_text_document_train_indexmap_1ns_2048sl_1234s_sample_idx.npy + 0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_400M_text_document_train_indexmap_1ns_2048sl_1234s_shuffle_idx.npy + 0: loaded indexed file in 0.057 seconds + 0: total number of samples: 195101 + 0: total number of epochs: 1 + 0: > building dataset index ... + 0: reading sizes... + 0: reading pointers... + 0: reading document index... + 0: creating numpy buffer of mmap... + 0: creating memory view of numpy buffer... + 0: > finished creating indexed dataset in 0.033319 seconds + 0: number of documents: 364608 + 0: > dataset split: + 0: validation: + 0: document indices in [0, 364608) total of 364608 documents + 0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_51200ns_2048sl_1234s_doc_idx.npy + 0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_51200ns_2048sl_1234s_sample_idx.npy + 0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_51200ns_2048sl_1234s_shuffle_idx.npy + 0: loaded indexed file in 0.090 seconds + 0: total number of samples: 84978 + 0: total number of epochs: 1 + 0: > finished creating GPT datasets ... + 0: [after dataloaders are built] datetime: 2023-05-25 13:38:11 + 0: done with setup ... + 0: training ... +31: time (ms) | model-and-optimizer-setup: 14533.34 | train/valid/test-data-iterators-setup: 1635.27 + 0: [after training is done] datetime: 2023-05-25 13:38:11 +31: ----------------------------------------------------------------------------------------------------------------- +31: validation loss at the end of training for val data | lm loss value: 3.131161E+00 | lm loss PPL: 2.290056E+01 | +31: ----------------------------------------------------------------------------------------------------------------- +END 3583607: Thu 25 May 2023 01:41:39 PM EEST diff --git a/8b7178b4b/latest b/8b7178b4b/latest new file mode 100644 index 0000000000000000000000000000000000000000..7d2b36fef5477c4b300ce92aa705d2451bb96c90 --- /dev/null +++ b/8b7178b4b/latest @@ -0,0 +1 @@ +global_step84877 diff --git a/8b7178b4b/sbatch_8b7178b4b.sh b/8b7178b4b/sbatch_8b7178b4b.sh new file mode 100755 index 0000000000000000000000000000000000000000..f2000335a525140d251bb9814f3592ecd07d15f4 --- /dev/null +++ b/8b7178b4b/sbatch_8b7178b4b.sh @@ -0,0 +1,165 @@ +#!/bin/bash +#SBATCH --exclude=nid007542 +#SBATCH --nodes=64 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=40 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=8b7178b4b + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" + +TRAIN_DATA_PATH=train4b.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_4B_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + + +PP_SIZE=2 +TP_SIZE=2 + +MICRO_BATCH_SIZE=2 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_9293M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=5000 + +# Tokens: 178000000000 +# -> Samples: 86914062 +TRAIN_SAMPLES=86_914_062 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 869_140 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1000 \ + --eval-iters 1 \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/8b7178b4b/sbatch_8b7178b4bfast.sh b/8b7178b4b/sbatch_8b7178b4bfast.sh new file mode 100755 index 0000000000000000000000000000000000000000..18dfc830fd82d8f7850441449d6df337c31259f1 --- /dev/null +++ b/8b7178b4b/sbatch_8b7178b4bfast.sh @@ -0,0 +1,165 @@ +#!/bin/bash +#SBATCH --exclude=nid007542 +#SBATCH --nodes=32 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=40 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=8b7178b4bfast + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" + +TRAIN_DATA_PATH=train4b.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_4B_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + + +PP_SIZE=4 +TP_SIZE=4 + +MICRO_BATCH_SIZE=1 +GRADIENT_ACCUMULATION_STEPS=4 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_9293M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=5000 + +# Tokens: 178000000000 +# -> Samples: 86914062 +TRAIN_SAMPLES=86_914_062 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 869_140 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1000 \ + --eval-iters 1 \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/8b7178b4b/sbatch_8b7178b4bval.sh b/8b7178b4b/sbatch_8b7178b4bval.sh new file mode 100644 index 0000000000000000000000000000000000000000..c6f9e37ff9f312732067982570d3b0600ae7d3af --- /dev/null +++ b/8b7178b4b/sbatch_8b7178b4bval.sh @@ -0,0 +1,172 @@ +#!/bin/bash +#SBATCH --exclude=nid007542 +#SBATCH --nodes=32 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=40 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=8b7178b4bval +VARIANT_CKPT=lm1-8b7-178b-c4-repetitions/8b7178b4b + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=$VARIANT_CKPT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" + +TRAIN_DATA_PATH=train400m.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_12B_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + +PP_SIZE=4 +TP_SIZE=4 + +MICRO_BATCH_SIZE=1 +GRADIENT_ACCUMULATION_STEPS=2 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_9293M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=5000 + +# Tokens: 11522010000 +# -> Samples: 5625981 +TRAIN_SAMPLES=1 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 0 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + --override-lr-scheduler \ + --reset-progress \ + --no-load-optim \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1 \ + --eval-iters 100 \ + --eval-only true \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + --num-workers 0 \ + --valid-num-workers 0 \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" + diff --git a/8b7178b4b/sbatch_8b74b4bfast.sh b/8b7178b4b/sbatch_8b74b4bfast.sh new file mode 100755 index 0000000000000000000000000000000000000000..b9bfd4dce2483e113956b2e3550940da6c408ff8 --- /dev/null +++ b/8b7178b4b/sbatch_8b74b4bfast.sh @@ -0,0 +1,167 @@ +#!/bin/bash +#SBATCH --exclude=nid007542 +#SBATCH --nodes=32 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=40 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=8b74b4bfast + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" + +TRAIN_DATA_PATH=train4b.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_4B_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + + +PP_SIZE=4 +TP_SIZE=4 + +MICRO_BATCH_SIZE=1 +GRADIENT_ACCUMULATION_STEPS=4 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_9293M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=5000 + +# Tokens: 178000000000 +# -> Samples: 86914062 +#TRAIN_SAMPLES=86_914_062 +TRAIN_SAMPLES=1_953_125 + + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 19_531 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1000 \ + --eval-iters 1 \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/8b7178b4b/tensorboard_8b7178b4bval/events.out.tfevents.1685008747.nid006608.20122.0 b/8b7178b4b/tensorboard_8b7178b4bval/events.out.tfevents.1685008747.nid006608.20122.0 new file mode 100644 index 0000000000000000000000000000000000000000..79fb451444ed64bb13f44d9b650e10f72caba51b --- /dev/null +++ b/8b7178b4b/tensorboard_8b7178b4bval/events.out.tfevents.1685008747.nid006608.20122.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44b3397c4547c677f7bdce9f66a40e37263ed048fa362ee885814195f20361c9 +size 980 diff --git a/8b7178b4b/tensorboard_8b7178b4bval/events.out.tfevents.1685011041.nid006136.75070.0 b/8b7178b4b/tensorboard_8b7178b4bval/events.out.tfevents.1685011041.nid006136.75070.0 new file mode 100644 index 0000000000000000000000000000000000000000..7df6bd747607b427c02cbf63a66ff6d04aa9afcf --- /dev/null +++ b/8b7178b4b/tensorboard_8b7178b4bval/events.out.tfevents.1685011041.nid006136.75070.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad962a70a4df4573b0a1ff3ab6f928823c212e4172ba688fb76609da0824d90d +size 980